<html><head><meta http-equiv="Content-Type" content="text/html charset=utf-8"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;" class="">Hi Ahmed,<div class=""><br class=""></div><div class="">I’ve bisected an instruction selection failure with the old vector shuffle to this commit: <a href="http://llvm.org/bugs/show_bug.cgi?id=21876" class="">http://llvm.org/bugs/show_bug.cgi?id=21876</a></div><div class=""><br class=""></div><div class="">Would you mind having a look?</div><div class=""><br class=""></div><div class="">Thanks,</div><div class="">-Quentin<br class=""><div><blockquote type="cite" class=""><div class="">On Dec 5, 2014, at 5:31 PM, Ahmed Bougacha <<a href="mailto:ahmed.bougacha@gmail.com" class="">ahmed.bougacha@gmail.com</a>> wrote:</div><br class="Apple-interchange-newline"><div class="">Author: ab<br class="">Date: Fri Dec  5 19:31:07 2014<br class="">New Revision: 223567<br class=""><br class="">URL: <a href="http://llvm.org/viewvc/llvm-project?rev=223567&view=rev" class="">http://llvm.org/viewvc/llvm-project?rev=223567&view=rev</a><br class="">Log:<br class="">[X86] Refactor PMOV[SZ]Xrm to add missing AVX2 patterns.<br class=""><br class="">Most patterns will go away once the extload legalization changes land.<br class=""><br class="">Differential Revision: <a href="http://reviews.llvm.org/D6125" class="">http://reviews.llvm.org/D6125</a><br class=""><br class="">Added:<br class="">    llvm/trunk/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll<br class="">    llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll<br class="">Modified:<br class="">    llvm/trunk/lib/Target/X86/X86InstrSSE.td<br class="">    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h<br class="">    llvm/trunk/test/CodeGen/X86/vector-sext.ll<br class="">    llvm/trunk/test/CodeGen/X86/vector-zext.ll<br class=""><br class="">Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=223567&r1=223566&r2=223567&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=223567&r1=223566&r2=223567&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)<br class="">+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Dec  5 19:31:07 2014<br class="">@@ -6094,552 +6094,223 @@ def : InstAlias<"monitor\t{%rax, %rcx, %<br class=""> // SSE4.1 - Packed Move with Sign/Zero Extend<br class=""> //===----------------------------------------------------------------------===//<br class=""><br class="">-multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId,<br class="">-                               OpndItins itins = DEFAULT_ITINS> {<br class="">-  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),<br class="">+multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,<br class="">+                          RegisterClass OutRC, RegisterClass InRC,<br class="">+                          OpndItins itins> {<br class="">+  def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src),<br class="">                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br class="">-                 [(set VR128:$dst, (IntId VR128:$src))], itins.rr>,<br class="">+                 [], itins.rr>,<br class="">                  Sched<[itins.Sched]>;<br class=""><br class="">-  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),<br class="">+  def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src),<br class="">                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br class="">-       [(set VR128:$dst,<br class="">-         (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))],<br class="">-         itins.rm>, Sched<[itins.Sched.Folded]>;<br class="">+                 [],<br class="">+                 itins.rm>, Sched<[itins.Sched.Folded]>;<br class=""> }<br class=""><br class="">-multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,<br class="">-                                 Intrinsic IntId, X86FoldableSchedWrite Sched> {<br class="">-  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),<br class="">-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br class="">-                  [(set VR256:$dst, (IntId VR128:$src))]>, Sched<[Sched]>;<br class="">-<br class="">-  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),<br class="">-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br class="">-                  [(set VR256:$dst, (IntId (load addr:$src)))]>,<br class="">-                  Sched<[Sched.Folded]>;<br class="">+multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,<br class="">+                          X86MemOperand MemOp, X86MemOperand MemYOp,<br class="">+                          OpndItins SSEItins, OpndItins AVXItins,<br class="">+                          OpndItins AVX2Itins> {<br class="">+  defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, SSEItins>;<br class="">+  let Predicates = [HasAVX] in<br class="">+    defm V#NAME   : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,<br class="">+                                     VR128, VR128, AVXItins>, VEX;<br class="">+  let Predicates = [HasAVX2] in<br class="">+    defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,<br class="">+                                     VR256, VR128, AVX2Itins>, VEX, VEX_L;<br class="">+}<br class="">+<br class="">+multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr,<br class="">+                                X86MemOperand MemOp, X86MemOperand MemYOp> {<br class="">+  defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr),<br class="">+                                        MemOp, MemYOp,<br class="">+                                        SSE_INTALU_ITINS_SHUFF_P,<br class="">+                                        DEFAULT_ITINS_SHUFFLESCHED,<br class="">+                                        DEFAULT_ITINS_SHUFFLESCHED>;<br class="">+  defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10),<br class="">+                                        !strconcat("pmovzx", OpcodeStr),<br class="">+                                        MemOp, MemYOp,<br class="">+                                        SSE_INTALU_ITINS_SHUFF_P,<br class="">+                                        DEFAULT_ITINS_SHUFFLESCHED,<br class="">+                                        DEFAULT_ITINS_SHUFFLESCHED>;<br class="">+}<br class="">+<br class="">+defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem>;<br class="">+defm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem>;<br class="">+defm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem>;<br class="">+<br class="">+defm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem>;<br class="">+defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem>;<br class="">+<br class="">+defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem>;<br class="">+<br class="">+// AVX2 Patterns<br class="">+multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, SDNode ExtOp> {<br class="">+  // Register-Register patterns<br class="">+  def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),<br class="">+            (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;<br class="">+  def : Pat<(v8i32 (ExtOp (v16i8 VR128:$src))),<br class="">+            (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;<br class="">+  def : Pat<(v4i64 (ExtOp (v16i8 VR128:$src))),<br class="">+            (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;<br class="">+<br class="">+  def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),<br class="">+            (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;<br class="">+  def : Pat<(v4i64 (ExtOp (v8i16 VR128:$src))),<br class="">+            (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;<br class="">+<br class="">+  def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),<br class="">+            (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;<br class="">+<br class="">+  // AVX2 Register-Memory patterns<br class="">+  def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;<br class="">+  def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;<br class="">+  def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;<br class="">+  def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;<br class="">+<br class="">+  def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),<br class="">+            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;<br class="">+  def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;<br class="">+  def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;<br class="">+  def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;<br class="">+<br class="">+  def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),<br class="">+            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;<br class="">+  def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;<br class="">+  def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;<br class="">+  def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;<br class="">+<br class="">+  def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;<br class="">+  def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;<br class="">+  def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;<br class="">+  def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;<br class="">+<br class="">+  def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),<br class="">+            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;<br class="">+  def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;<br class="">+  def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;<br class="">+  def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;<br class="">+<br class="">+  def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;<br class="">+  def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;<br class="">+  def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;<br class="">+  def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;<br class=""> }<br class=""><br class="">-let Predicates = [HasAVX] in {<br class="">-defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw",<br class="">-                                     int_x86_sse41_pmovsxbw,<br class="">-                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;<br class="">-defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd",<br class="">-                                     int_x86_sse41_pmovsxwd,<br class="">-                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;<br class="">-defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq",<br class="">-                                     int_x86_sse41_pmovsxdq,<br class="">-                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;<br class="">-defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw",<br class="">-                                     int_x86_sse41_pmovzxbw,<br class="">-                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;<br class="">-defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd",<br class="">-                                     int_x86_sse41_pmovzxwd,<br class="">-                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;<br class="">-defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq",<br class="">-                                     int_x86_sse41_pmovzxdq,<br class="">-                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;<br class="">-}<br class="">-<br class="">-let Predicates = [HasAVX2] in {<br class="">-defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw",<br class="">-                                        int_x86_avx2_pmovsxbw,<br class="">-                                        WriteShuffle>, VEX, VEX_L;<br class="">-defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd",<br class="">-                                        int_x86_avx2_pmovsxwd,<br class="">-                                        WriteShuffle>, VEX, VEX_L;<br class="">-defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq",<br class="">-                                        int_x86_avx2_pmovsxdq,<br class="">-                                        WriteShuffle>, VEX, VEX_L;<br class="">-defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw",<br class="">-                                        int_x86_avx2_pmovzxbw,<br class="">-                                        WriteShuffle>, VEX, VEX_L;<br class="">-defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd",<br class="">-                                        int_x86_avx2_pmovzxwd,<br class="">-                                        WriteShuffle>, VEX, VEX_L;<br class="">-defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",<br class="">-                                        int_x86_avx2_pmovzxdq,<br class="">-                                        WriteShuffle>, VEX, VEX_L;<br class="">-}<br class="">-<br class="">-defm PMOVSXBW   : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw,<br class="">-                                      SSE_INTALU_ITINS_SHUFF_P>;<br class="">-defm PMOVSXWD   : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd,<br class="">-                                      SSE_INTALU_ITINS_SHUFF_P>;<br class="">-defm PMOVSXDQ   : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq,<br class="">-                                      SSE_INTALU_ITINS_SHUFF_P>;<br class="">-defm PMOVZXBW   : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw,<br class="">-                                      SSE_INTALU_ITINS_SHUFF_P>;<br class="">-defm PMOVZXWD   : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd,<br class="">-                                      SSE_INTALU_ITINS_SHUFF_P>;<br class="">-defm PMOVZXDQ   : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq,<br class="">-                                      SSE_INTALU_ITINS_SHUFF_P>;<br class="">-<br class="">-let Predicates = [HasAVX] in {<br class="">-  // Common patterns involving scalar load.<br class="">-  def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),<br class="">-            (VPMOVSXBWrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),<br class="">-            (VPMOVSXBWrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),<br class="">-            (VPMOVSXBWrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),<br class="">-            (VPMOVSXWDrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),<br class="">-            (VPMOVSXWDrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),<br class="">-            (VPMOVSXWDrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),<br class="">-            (VPMOVSXDQrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),<br class="">-            (VPMOVSXDQrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),<br class="">-            (VPMOVSXDQrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),<br class="">-            (VPMOVZXBWrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),<br class="">-            (VPMOVZXBWrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),<br class="">-            (VPMOVZXBWrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),<br class="">-            (VPMOVZXWDrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),<br class="">-            (VPMOVZXWDrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),<br class="">-            (VPMOVZXWDrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),<br class="">-            (VPMOVZXDQrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),<br class="">-            (VPMOVZXDQrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),<br class="">-            (VPMOVZXDQrm addr:$src)>;<br class="">-}<br class="">-<br class="">-let Predicates = [UseSSE41] in {<br class="">-  // Common patterns involving scalar load.<br class="">-  def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),<br class="">-            (PMOVSXBWrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),<br class="">-            (PMOVSXBWrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),<br class="">-            (PMOVSXBWrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),<br class="">-            (PMOVSXWDrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),<br class="">-            (PMOVSXWDrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),<br class="">-            (PMOVSXWDrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),<br class="">-            (PMOVSXDQrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),<br class="">-            (PMOVSXDQrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),<br class="">-            (PMOVSXDQrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),<br class="">-            (PMOVZXBWrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),<br class="">-            (PMOVZXBWrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),<br class="">-            (PMOVZXBWrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),<br class="">-            (PMOVZXWDrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),<br class="">-            (PMOVZXWDrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),<br class="">-            (PMOVZXWDrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),<br class="">-            (PMOVZXDQrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),<br class="">-            (PMOVZXDQrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),<br class="">-            (PMOVZXDQrm addr:$src)>;<br class="">-}<br class="">-<br class="">-multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId,<br class="">-                               OpndItins itins = DEFAULT_ITINS> {<br class="">-  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),<br class="">-                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br class="">-                 [(set VR128:$dst, (IntId VR128:$src))], itins.rr>,<br class="">-                 Sched<[itins.Sched]>;<br class="">-<br class="">-  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),<br class="">-                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br class="">-       [(set VR128:$dst,<br class="">-         (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))],<br class="">-         itins.rm>, Sched<[itins.Sched.Folded]>;<br class="">-}<br class="">-<br class="">-multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr,<br class="">-                                 Intrinsic IntId, X86FoldableSchedWrite Sched> {<br class="">-  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),<br class="">-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br class="">-                  [(set VR256:$dst, (IntId VR128:$src))]>, Sched<[Sched]>;<br class="">-<br class="">-  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src),<br class="">-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br class="">-       [(set VR256:$dst,<br class="">-         (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,<br class="">-         Sched<[Sched.Folded]>;<br class="">-}<br class="">-<br class="">-let Predicates = [HasAVX] in {<br class="">-defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd,<br class="">-                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;<br class="">-defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq,<br class="">-                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;<br class="">-defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd,<br class="">-                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;<br class="">-defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq,<br class="">-                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;<br class="">-}<br class="">-<br class="">-let Predicates = [HasAVX2] in {<br class="">-defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd",<br class="">-                                       int_x86_avx2_pmovsxbd, WriteShuffle>,<br class="">-                                       VEX, VEX_L;<br class="">-defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq",<br class="">-                                       int_x86_avx2_pmovsxwq, WriteShuffle>,<br class="">-                                       VEX, VEX_L;<br class="">-defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd",<br class="">-                                       int_x86_avx2_pmovzxbd, WriteShuffle>,<br class="">-                                       VEX, VEX_L;<br class="">-defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",<br class="">-                                       int_x86_avx2_pmovzxwq, WriteShuffle>,<br class="">-                                       VEX, VEX_L;<br class="">-}<br class="">-<br class="">-defm PMOVSXBD   : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd,<br class="">-                                      SSE_INTALU_ITINS_SHUFF_P>;<br class="">-defm PMOVSXWQ   : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq,<br class="">-                                      SSE_INTALU_ITINS_SHUFF_P>;<br class="">-defm PMOVZXBD   : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd,<br class="">-                                      SSE_INTALU_ITINS_SHUFF_P>;<br class="">-defm PMOVZXWQ   : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq,<br class="">-                                      SSE_INTALU_ITINS_SHUFF_P>;<br class="">-<br class="">-let Predicates = [HasAVX] in {<br class="">-  // Common patterns involving scalar load<br class="">-  def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),<br class="">-            (VPMOVSXBDrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),<br class="">-            (VPMOVSXWQrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),<br class="">-            (VPMOVZXBDrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),<br class="">-            (VPMOVZXWQrm addr:$src)>;<br class="">-}<br class="">-<br class="">-let Predicates = [UseSSE41] in {<br class="">-  // Common patterns involving scalar load<br class="">-  def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),<br class="">-            (PMOVSXBDrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),<br class="">-            (PMOVSXWQrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),<br class="">-            (PMOVZXBDrm addr:$src)>;<br class="">-  def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),<br class="">-            (PMOVZXWQrm addr:$src)>;<br class="">-}<br class="">-<br class="">-multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId,<br class="">-                               X86FoldableSchedWrite Sched> {<br class="">-  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),<br class="">-                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br class="">-                 [(set VR128:$dst, (IntId VR128:$src))]>, Sched<[Sched]>;<br class="">-<br class="">-  // Expecting a i16 load any extended to i32 value.<br class="">-  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),<br class="">-                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br class="">-                 [(set VR128:$dst, (IntId (bitconvert<br class="">-                     (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,<br class="">-                 Sched<[Sched.Folded]>;<br class="">-}<br class="">-<br class="">-multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr,<br class="">-                                 Intrinsic IntId, X86FoldableSchedWrite Sched> {<br class="">-  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),<br class="">-                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br class="">-                 [(set VR256:$dst, (IntId VR128:$src))]>, Sched<[Sched]>;<br class="">-<br class="">-  // Expecting a i16 load any extended to i32 value.<br class="">-  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src),<br class="">-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br class="">-                  [(set VR256:$dst, (IntId (bitconvert<br class="">-                      (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,<br class="">-                 Sched<[Sched.Folded]>;<br class="">-}<br class="">-<br class="">-let Predicates = [HasAVX] in {<br class="">-defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq,<br class="">-                                     WriteShuffle>, VEX;<br class="">-defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq,<br class="">-                                     WriteShuffle>, VEX;<br class="">-}<br class=""> let Predicates = [HasAVX2] in {<br class="">-defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq", int_x86_avx2_pmovsxbq,<br class="">-                                       WriteShuffle>, VEX, VEX_L;<br class="">-defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", int_x86_avx2_pmovzxbq,<br class="">-                                       WriteShuffle>, VEX, VEX_L;<br class="">-}<br class="">-defm PMOVSXBQ   : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq,<br class="">-                                      WriteShuffle>;<br class="">-defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq,<br class="">-                                      WriteShuffle>;<br class="">-<br class="">-let Predicates = [HasAVX2] in {<br class="">-  def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;<br class="">-  def : Pat<(v8i32  (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>;<br class="">-  def : Pat<(v4i64  (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v8i32  (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;<br class="">-  def : Pat<(v4i64  (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v4i64  (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))),<br class="">-            (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;<br class="">-  def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))),<br class="">-            (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;<br class="">-  def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))),<br class="">-            (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;<br class="">-<br class="">-  def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))),<br class="">-            (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;<br class="">-  def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))),<br class="">-            (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;<br class="">-<br class="">-  def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))),<br class="">-            (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;<br class="">-<br class="">-  def : Pat<(v8i32 (X86vsext (v8i16 (bitconvert (v2i64 (load addr:$src)))))),<br class="">-            (VPMOVSXWDYrm addr:$src)>;<br class="">-  def : Pat<(v4i64 (X86vsext (v4i32 (bitconvert (v2i64 (load addr:$src)))))),<br class="">-            (VPMOVSXDQYrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64<br class="">-                    (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (VPMOVSXBDYrm addr:$src)>;<br class="">-  def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64<br class="">-                    (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (VPMOVSXBDYrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64<br class="">-                    (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (VPMOVSXWQYrm addr:$src)>;<br class="">-  def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64<br class="">-                    (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (VPMOVSXWQYrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32<br class="">-                    (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (VPMOVSXBQYrm addr:$src)>;<br class="">-}<br class="">-<br class="">-let Predicates = [HasAVX] in {<br class="">-  // Common patterns involving scalar load<br class="">-  def : Pat<(int_x86_sse41_pmovsxbq<br class="">-              (bitconvert (v4i32 (X86vzmovl<br class="">-                            (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (VPMOVSXBQrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovzxbq<br class="">-              (bitconvert (v4i32 (X86vzmovl<br class="">-                            (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (VPMOVZXBQrm addr:$src)>;<br class="">+  defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", X86vsext>;<br class="">+  defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", X86vzext>;<br class=""> }<br class=""><br class="">-let Predicates = [UseSSE41] in {<br class="">-  def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;<br class="">-  def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>;<br class="">-  def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;<br class="">-  def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;<br class="">-<br class="">-  // Common patterns involving scalar load<br class="">-  def : Pat<(int_x86_sse41_pmovsxbq<br class="">-              (bitconvert (v4i32 (X86vzmovl<br class="">-                            (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (PMOVSXBQrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(int_x86_sse41_pmovzxbq<br class="">-              (bitconvert (v4i32 (X86vzmovl<br class="">-                            (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (PMOVZXBQrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64<br class="">-                    (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (PMOVSXWDrm addr:$src)>;<br class="">-  def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64<br class="">-                    (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (PMOVSXWDrm addr:$src)>;<br class="">-  def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32<br class="">-                    (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (PMOVSXBDrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32<br class="">-                    (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (PMOVSXWQrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32<br class="">-                    (scalar_to_vector (extloadi32i16 addr:$src))))))),<br class="">-            (PMOVSXBQrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64<br class="">-                    (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (PMOVSXDQrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64<br class="">-                    (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (PMOVSXDQrm addr:$src)>;<br class="">-  def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64<br class="">-                    (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (PMOVSXBWrm addr:$src)>;<br class="">-  def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64<br class="">-                    (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (PMOVSXBWrm addr:$src)>;<br class="">-}<br class="">-<br class="">-let Predicates = [HasAVX2] in {<br class="">-  def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr VR128:$src)>;<br class="">-  def : Pat<(v8i32  (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr VR128:$src)>;<br class="">-  def : Pat<(v4i64  (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v8i32  (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr VR128:$src)>;<br class="">-  def : Pat<(v4i64  (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v4i64  (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v16i16 (X86vzext (v32i8 VR256:$src))),<br class="">-            (VPMOVZXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;<br class="">-  def : Pat<(v8i32 (X86vzext (v32i8 VR256:$src))),<br class="">-            (VPMOVZXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;<br class="">-  def : Pat<(v4i64 (X86vzext (v32i8 VR256:$src))),<br class="">-            (VPMOVZXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;<br class="">-<br class="">-  def : Pat<(v8i32 (X86vzext (v16i16 VR256:$src))),<br class="">-            (VPMOVZXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;<br class="">-  def : Pat<(v4i64 (X86vzext (v16i16 VR256:$src))),<br class="">-            (VPMOVZXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;<br class="">-<br class="">-  def : Pat<(v4i64 (X86vzext (v8i32 VR256:$src))),<br class="">-            (VPMOVZXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;<br class="">+// SSE4.1/AVX patterns.<br class="">+multiclass SS41I_pmovx_patterns<string OpcPrefix, SDNode ExtOp,<br class="">+                                PatFrag ExtLoad16> {<br class="">+  def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))),<br class="">+            (!cast<I>(OpcPrefix#BWrr) VR128:$src)>;<br class="">+  def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))),<br class="">+            (!cast<I>(OpcPrefix#BDrr) VR128:$src)>;<br class="">+  def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))),<br class="">+            (!cast<I>(OpcPrefix#BQrr) VR128:$src)>;<br class="">+<br class="">+  def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))),<br class="">+            (!cast<I>(OpcPrefix#WDrr) VR128:$src)>;<br class="">+  def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))),<br class="">+            (!cast<I>(OpcPrefix#WQrr) VR128:$src)>;<br class="">+<br class="">+  def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))),<br class="">+            (!cast<I>(OpcPrefix#DQrr) VR128:$src)>;<br class="">+<br class="">+  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),<br class="">+            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;<br class="">+  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),<br class="">+            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;<br class="">+  def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;<br class="">+  def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;<br class="">+  def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;<br class="">+<br class="">+  def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),<br class="">+            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;<br class="">+  def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;<br class="">+  def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;<br class="">+  def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;<br class="">+<br class="">+  def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),<br class="">+            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;<br class="">+  def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;<br class="">+  def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;<br class="">+  def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;<br class="">+<br class="">+  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),<br class="">+            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;<br class="">+  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),<br class="">+            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;<br class="">+  def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;<br class="">+  def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;<br class="">+  def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;<br class="">+<br class="">+  def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),<br class="">+            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;<br class="">+  def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;<br class="">+  def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;<br class="">+  def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;<br class="">+<br class="">+  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),<br class="">+            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;<br class="">+  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),<br class="">+            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;<br class="">+  def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;<br class="">+  def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;<br class="">+  def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),<br class="">+            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;<br class=""> }<br class=""><br class=""> let Predicates = [HasAVX] in {<br class="">-  def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr VR128:$src)>;<br class="">-  def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr VR128:$src)>;<br class="">-  def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr VR128:$src)>;<br class="">-  def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (VPMOVZXBWrm addr:$src)>;<br class="">-  def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (VPMOVZXBWrm addr:$src)>;<br class="">-  def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (VPMOVZXBDrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),<br class="">-            (VPMOVZXBQrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (VPMOVZXWDrm addr:$src)>;<br class="">-  def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (VPMOVZXWDrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (VPMOVZXWQrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (VPMOVZXDQrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (VPMOVZXDQrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),<br class="">-            (VPMOVZXDQrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;<br class="">-  def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>;<br class="">-  def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;<br class="">-  def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64<br class="">-                    (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (VPMOVSXWDrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64<br class="">-                    (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (VPMOVSXDQrm addr:$src)>;<br class="">-  def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64<br class="">-                    (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (VPMOVSXWDrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64<br class="">-                    (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (VPMOVSXDQrm addr:$src)>;<br class="">-  def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64<br class="">-                    (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (VPMOVSXBWrm addr:$src)>;<br class="">-  def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64<br class="">-                    (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (VPMOVSXBWrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32<br class="">-                    (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (VPMOVSXBDrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32<br class="">-                    (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (VPMOVSXWQrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32<br class="">-                    (scalar_to_vector (extloadi32i16 addr:$src))))))),<br class="">-            (VPMOVSXBQrm addr:$src)>;<br class="">+  defm : SS41I_pmovx_patterns<"VPMOVSX", X86vsext, extloadi32i16>;<br class="">+  defm : SS41I_pmovx_patterns<"VPMOVZX", X86vzext, loadi16_anyext>;<br class=""> }<br class=""><br class=""> let Predicates = [UseSSE41] in {<br class="">-  def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr VR128:$src)>;<br class="">-  def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr VR128:$src)>;<br class="">-  def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr VR128:$src)>;<br class="">-  def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (PMOVZXWQrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr VR128:$src)>;<br class="">-<br class="">-  def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (PMOVZXBWrm addr:$src)>;<br class="">-  def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (PMOVZXBWrm addr:$src)>;<br class="">-  def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (PMOVZXBDrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))),<br class="">-            (PMOVZXBQrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (PMOVZXWDrm addr:$src)>;<br class="">-  def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (PMOVZXWDrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),<br class="">-            (PMOVZXWQrm addr:$src)>;<br class="">-<br class="">-  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),<br class="">-            (PMOVZXDQrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))),<br class="">-            (PMOVZXDQrm addr:$src)>;<br class="">-  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),<br class="">-            (PMOVZXDQrm addr:$src)>;<br class="">+  defm : SS41I_pmovx_patterns<"PMOVSX", X86vsext, extloadi32i16>;<br class="">+  defm : SS41I_pmovx_patterns<"PMOVZX", X86vzext, loadi16_anyext>;<br class=""> }<br class=""><br class=""> //===----------------------------------------------------------------------===//<br class=""><br class="">Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=223567&r1=223566&r2=223567&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=223567&r1=223566&r2=223567&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)<br class="">+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Fri Dec  5 19:31:07 2014<br class="">@@ -138,6 +138,18 @@ static const IntrinsicData  IntrinsicsWi<br class="">   X86_INTRINSIC_DATA(avx2_pminu_b,      INTR_TYPE_2OP, X86ISD::UMIN, 0),<br class="">   X86_INTRINSIC_DATA(avx2_pminu_d,      INTR_TYPE_2OP, X86ISD::UMIN, 0),<br class="">   X86_INTRINSIC_DATA(avx2_pminu_w,      INTR_TYPE_2OP, X86ISD::UMIN, 0),<br class="">+  X86_INTRINSIC_DATA(avx2_pmovsxbd,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),<br class="">+  X86_INTRINSIC_DATA(avx2_pmovsxbq,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),<br class="">+  X86_INTRINSIC_DATA(avx2_pmovsxbw,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),<br class="">+  X86_INTRINSIC_DATA(avx2_pmovsxdq,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),<br class="">+  X86_INTRINSIC_DATA(avx2_pmovsxwd,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),<br class="">+  X86_INTRINSIC_DATA(avx2_pmovsxwq,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),<br class="">+  X86_INTRINSIC_DATA(avx2_pmovzxbd,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),<br class="">+  X86_INTRINSIC_DATA(avx2_pmovzxbq,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),<br class="">+  X86_INTRINSIC_DATA(avx2_pmovzxbw,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),<br class="">+  X86_INTRINSIC_DATA(avx2_pmovzxdq,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),<br class="">+  X86_INTRINSIC_DATA(avx2_pmovzxwd,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),<br class="">+  X86_INTRINSIC_DATA(avx2_pmovzxwq,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),<br class="">   X86_INTRINSIC_DATA(avx2_psll_d,       INTR_TYPE_2OP, X86ISD::VSHL, 0),<br class="">   X86_INTRINSIC_DATA(avx2_psll_q,       INTR_TYPE_2OP, X86ISD::VSHL, 0),<br class="">   X86_INTRINSIC_DATA(avx2_psll_w,       INTR_TYPE_2OP, X86ISD::VSHL, 0),<br class="">@@ -284,6 +296,18 @@ static const IntrinsicData  IntrinsicsWi<br class="">   X86_INTRINSIC_DATA(sse41_pminsd,      INTR_TYPE_2OP, X86ISD::SMIN, 0),<br class="">   X86_INTRINSIC_DATA(sse41_pminud,      INTR_TYPE_2OP, X86ISD::UMIN, 0),<br class="">   X86_INTRINSIC_DATA(sse41_pminuw,      INTR_TYPE_2OP, X86ISD::UMIN, 0),<br class="">+  X86_INTRINSIC_DATA(sse41_pmovsxbd,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),<br class="">+  X86_INTRINSIC_DATA(sse41_pmovsxbq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),<br class="">+  X86_INTRINSIC_DATA(sse41_pmovsxbw,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),<br class="">+  X86_INTRINSIC_DATA(sse41_pmovsxdq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),<br class="">+  X86_INTRINSIC_DATA(sse41_pmovsxwd,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),<br class="">+  X86_INTRINSIC_DATA(sse41_pmovsxwq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),<br class="">+  X86_INTRINSIC_DATA(sse41_pmovzxbd,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),<br class="">+  X86_INTRINSIC_DATA(sse41_pmovzxbq,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),<br class="">+  X86_INTRINSIC_DATA(sse41_pmovzxbw,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),<br class="">+  X86_INTRINSIC_DATA(sse41_pmovzxdq,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),<br class="">+  X86_INTRINSIC_DATA(sse41_pmovzxwd,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),<br class="">+  X86_INTRINSIC_DATA(sse41_pmovzxwq,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),<br class="">   X86_INTRINSIC_DATA(sse_comieq_ss,     COMI, X86ISD::COMI, ISD::SETEQ),<br class="">   X86_INTRINSIC_DATA(sse_comige_ss,     COMI, X86ISD::COMI, ISD::SETGE),<br class="">   X86_INTRINSIC_DATA(sse_comigt_ss,     COMI, X86ISD::COMI, ISD::SETGT),<br class=""><br class="">Added: llvm/trunk/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll?rev=223567&view=auto" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll?rev=223567&view=auto</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll (added)<br class="">+++ llvm/trunk/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll Fri Dec  5 19:31:07 2014<br class="">@@ -0,0 +1,110 @@<br class="">+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 | FileCheck %s<br class="">+<br class="">+define <16 x i16> @test_lvm_x86_avx2_pmovsxbw(<16 x i8>* %a) {<br class="">+; CHECK-LABEL: test_lvm_x86_avx2_pmovsxbw<br class="">+; CHECK: vpmovsxbw (%rdi), %ymm0<br class="">+  %1 = load <16 x i8>* %a, align 1<br class="">+  %2 = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %1)<br class="">+  ret <16 x i16> %2<br class="">+}<br class="">+<br class="">+define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbd<br class="">+; CHECK: vpmovsxbd (%rdi), %ymm0<br class="">+  %1 = load <16 x i8>* %a, align 1<br class="">+  %2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %1)<br class="">+  ret <8 x i32> %2<br class="">+}<br class="">+<br class="">+define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbq<br class="">+; CHECK: vpmovsxbq (%rdi), %ymm0<br class="">+  %1 = load <16 x i8>* %a, align 1<br class="">+  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %1)<br class="">+  ret <4 x i64> %2<br class="">+}<br class="">+<br class="">+define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwd<br class="">+; CHECK: vpmovsxwd (%rdi), %ymm0<br class="">+  %1 = load <8 x i16>* %a, align 1<br class="">+  %2 = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %1)<br class="">+  ret <8 x i32> %2<br class="">+}<br class="">+<br class="">+define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwq<br class="">+; CHECK: vpmovsxwq (%rdi), %ymm0<br class="">+  %1 = load <8 x i16>* %a, align 1<br class="">+  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %1)<br class="">+  ret <4 x i64> %2<br class="">+}<br class="">+<br class="">+define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_avx2_pmovsxdq<br class="">+; CHECK: vpmovsxdq (%rdi), %ymm0<br class="">+  %1 = load <4 x i32>* %a, align 1<br class="">+  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %1)<br class="">+  ret <4 x i64> %2<br class="">+}<br class="">+<br class="">+define <16 x i16> @test_lvm_x86_avx2_pmovzxbw(<16 x i8>* %a) {<br class="">+; CHECK-LABEL: test_lvm_x86_avx2_pmovzxbw<br class="">+; CHECK: vpmovzxbw (%rdi), %ymm0<br class="">+  %1 = load <16 x i8>* %a, align 1<br class="">+  %2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %1)<br class="">+  ret <16 x i16> %2<br class="">+}<br class="">+<br class="">+define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbd<br class="">+; CHECK: vpmovzxbd (%rdi), %ymm0<br class="">+  %1 = load <16 x i8>* %a, align 1<br class="">+  %2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %1)<br class="">+  ret <8 x i32> %2<br class="">+}<br class="">+<br class="">+define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbq<br class="">+; CHECK: vpmovzxbq (%rdi), %ymm0<br class="">+  %1 = load <16 x i8>* %a, align 1<br class="">+  %2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %1)<br class="">+  ret <4 x i64> %2<br class="">+}<br class="">+<br class="">+define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwd<br class="">+; CHECK: vpmovzxwd (%rdi), %ymm0<br class="">+  %1 = load <8 x i16>* %a, align 1<br class="">+  %2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %1)<br class="">+  ret <8 x i32> %2<br class="">+}<br class="">+<br class="">+define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwq<br class="">+; CHECK: vpmovzxwq (%rdi), %ymm0<br class="">+  %1 = load <8 x i16>* %a, align 1<br class="">+  %2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %1)<br class="">+  ret <4 x i64> %2<br class="">+}<br class="">+<br class="">+define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_avx2_pmovzxdq<br class="">+; CHECK: vpmovzxdq (%rdi), %ymm0<br class="">+  %1 = load <4 x i32>* %a, align 1<br class="">+  %2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %1)<br class="">+  ret <4 x i64> %2<br class="">+}<br class="">+<br class="">+declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>)<br class="">+declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>)<br class="">+declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>)<br class="">+declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>)<br class="">+declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>)<br class="">+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>)<br class="">+declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>)<br class="">+declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>)<br class="">+declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>)<br class="">+declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>)<br class="">+declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>)<br class="">+declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>)<br class=""><br class="">Added: llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll?rev=223567&view=auto" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll?rev=223567&view=auto</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll (added)<br class="">+++ llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll Fri Dec  5 19:31:07 2014<br class="">@@ -0,0 +1,123 @@<br class="">+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41<br class="">+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX<br class="">+<br class="">+define <8 x i16> @test_llvm_x86_sse41_pmovsxbw(<16 x i8>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbw<br class="">+; SSE41: pmovsxbw (%rdi), %xmm0<br class="">+; AVX:  vpmovsxbw (%rdi), %xmm0<br class="">+  %1 = load <16 x i8>* %a, align 1<br class="">+  %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %1)<br class="">+  ret <8 x i16> %2<br class="">+}<br class="">+<br class="">+define <4 x i32> @test_llvm_x86_sse41_pmovsxbd(<16 x i8>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbd<br class="">+; SSE41: pmovsxbd (%rdi), %xmm0<br class="">+; AVX:  vpmovsxbd (%rdi), %xmm0<br class="">+  %1 = load <16 x i8>* %a, align 1<br class="">+  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %1)<br class="">+  ret <4 x i32> %2<br class="">+}<br class="">+<br class="">+define <2 x i64> @test_llvm_x86_sse41_pmovsxbq(<16 x i8>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbq<br class="">+; SSE41: pmovsxbq (%rdi), %xmm0<br class="">+; AVX:  vpmovsxbq (%rdi), %xmm0<br class="">+  %1 = load <16 x i8>* %a, align 1<br class="">+  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %1)<br class="">+  ret <2 x i64> %2<br class="">+}<br class="">+<br class="">+define <4 x i32> @test_llvm_x86_sse41_pmovsxwd(<8 x i16>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwd<br class="">+; SSE41: pmovsxwd (%rdi), %xmm0<br class="">+; AVX:  vpmovsxwd (%rdi), %xmm0<br class="">+  %1 = load <8 x i16>* %a, align 1<br class="">+  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1)<br class="">+  ret <4 x i32> %2<br class="">+}<br class="">+<br class="">+define <2 x i64> @test_llvm_x86_sse41_pmovsxwq(<8 x i16>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwq<br class="">+; SSE41: pmovsxwq (%rdi), %xmm0<br class="">+; AVX:  vpmovsxwq (%rdi), %xmm0<br class="">+  %1 = load <8 x i16>* %a, align 1<br class="">+  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %1)<br class="">+  ret <2 x i64> %2<br class="">+}<br class="">+<br class="">+define <2 x i64> @test_llvm_x86_sse41_pmovsxdq(<4 x i32>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_sse41_pmovsxdq<br class="">+; SSE41: pmovsxdq (%rdi), %xmm0<br class="">+; AVX:  vpmovsxdq (%rdi), %xmm0<br class="">+  %1 = load <4 x i32>* %a, align 1<br class="">+  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %1)<br class="">+  ret <2 x i64> %2<br class="">+}<br class="">+<br class="">+define <8 x i16> @test_llvm_x86_sse41_pmovzxbw(<16 x i8>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbw<br class="">+; SSE41: pmovzxbw (%rdi), %xmm0<br class="">+; AVX:  vpmovzxbw (%rdi), %xmm0<br class="">+  %1 = load <16 x i8>* %a, align 1<br class="">+  %2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %1)<br class="">+  ret <8 x i16> %2<br class="">+}<br class="">+<br class="">+define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbd<br class="">+; SSE41: pmovzxbd (%rdi), %xmm0<br class="">+; AVX:  vpmovzxbd (%rdi), %xmm0<br class="">+  %1 = load <16 x i8>* %a, align 1<br class="">+  %2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %1)<br class="">+  ret <4 x i32> %2<br class="">+}<br class="">+<br class="">+define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbq<br class="">+; SSE41: pmovzxbq (%rdi), %xmm0<br class="">+; AVX:  vpmovzxbq (%rdi), %xmm0<br class="">+  %1 = load <16 x i8>* %a, align 1<br class="">+  %2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %1)<br class="">+  ret <2 x i64> %2<br class="">+}<br class="">+<br class="">+define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwd<br class="">+; SSE41: pmovzxwd (%rdi), %xmm0<br class="">+; AVX:  vpmovzxwd (%rdi), %xmm0<br class="">+  %1 = load <8 x i16>* %a, align 1<br class="">+  %2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %1)<br class="">+  ret <4 x i32> %2<br class="">+}<br class="">+<br class="">+define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwq<br class="">+; SSE41: pmovzxwq (%rdi), %xmm0<br class="">+; AVX:  vpmovzxwq (%rdi), %xmm0<br class="">+  %1 = load <8 x i16>* %a, align 1<br class="">+  %2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %1)<br class="">+  ret <2 x i64> %2<br class="">+}<br class="">+<br class="">+define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) {<br class="">+; CHECK-LABEL: test_llvm_x86_sse41_pmovzxdq<br class="">+; SSE41: pmovzxdq (%rdi), %xmm0<br class="">+; AVX:  vpmovzxdq (%rdi), %xmm0<br class="">+  %1 = load <4 x i32>* %a, align 1<br class="">+  %2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %1)<br class="">+  ret <2 x i64> %2<br class="">+}<br class="">+<br class="">+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>)<br class="">+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>)<br class="">+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>)<br class="">+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>)<br class="">+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>)<br class="">+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>)<br class="">+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>)<br class="">+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>)<br class="">+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>)<br class="">+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>)<br class="">+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>)<br class="">+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>)<br class=""><br class="">Modified: llvm/trunk/test/CodeGen/X86/vector-sext.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext.ll?rev=223567&r1=223566&r2=223567&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext.ll?rev=223567&r1=223566&r2=223567&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/X86/vector-sext.ll (original)<br class="">+++ llvm/trunk/test/CodeGen/X86/vector-sext.ll Fri Dec  5 19:31:07 2014<br class="">@@ -567,8 +567,7 @@ define <16 x i16> @sext_16i8_to_16i16(<1<br class=""> ;<br class=""> ; AVX2-LABEL: sext_16i8_to_16i16:<br class=""> ; AVX2:       # BB#0: # %entry<br class="">-; AVX2-NEXT:    vmovdqa (%rdi), %xmm0<br class="">-; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0<br class="">+; AVX2-NEXT:    vpmovsxbw (%rdi), %ymm0<br class=""> ; AVX2-NEXT:    retq<br class=""> ;<br class=""> ; X32-SSE41-LABEL: sext_16i8_to_16i16:<br class=""><br class="">Modified: llvm/trunk/test/CodeGen/X86/vector-zext.ll<br class="">URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext.ll?rev=223567&r1=223566&r2=223567&view=diff" class="">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext.ll?rev=223567&r1=223566&r2=223567&view=diff</a><br class="">==============================================================================<br class="">--- llvm/trunk/test/CodeGen/X86/vector-zext.ll (original)<br class="">+++ llvm/trunk/test/CodeGen/X86/vector-zext.ll Fri Dec  5 19:31:07 2014<br class="">@@ -204,3 +204,157 @@ entry:<br class="">   %t = zext <16 x i8> %z to <16 x i16><br class="">   ret <16 x i16> %t<br class=""> }<br class="">+<br class="">+define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {<br class="">+; SSE2-LABEL: load_zext_16i8_to_16i16:<br class="">+; SSE2:        # BB#0: # %entry<br class="">+; SSE2-NEXT:   movdqa        (%rdi), %xmm1<br class="">+; SSE2-NEXT:   movdqa        %xmm1, %xmm0<br class="">+; SSE2-NEXT:   punpcklbw     %xmm0, %xmm0    # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]<br class="">+; SSE2-NEXT:   movdqa        {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]<br class="">+; SSE2-NEXT:   pand  %xmm2, %xmm0<br class="">+; SSE2-NEXT:   punpckhbw     %xmm1, %xmm1    # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]<br class="">+; SSE2-NEXT:   pand  %xmm2, %xmm1<br class="">+; SSE2-NEXT:   retq<br class="">+<br class="">+; SSSE3-LABEL: load_zext_16i8_to_16i16:<br class="">+; SSSE3:        # BB#0: # %entry<br class="">+; SSSE3-NEXT:   movdqa        (%rdi), %xmm1<br class="">+; SSSE3-NEXT:   movdqa        %xmm1, %xmm0<br class="">+; SSSE3-NEXT:   punpcklbw     %xmm0, %xmm0    # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]<br class="">+; SSSE3-NEXT:   movdqa        {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]<br class="">+; SSSE3-NEXT:   pand  %xmm2, %xmm0<br class="">+; SSSE3-NEXT:   punpckhbw     %xmm1, %xmm1    # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]<br class="">+; SSSE3-NEXT:   pand  %xmm2, %xmm1<br class="">+; SSSE3-NEXT:   retq<br class="">+<br class="">+; SSE41-LABEL: load_zext_16i8_to_16i16:<br class="">+; SSE41:        # BB#0: # %entry<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">     </span>movdqa<span class="Apple-tab-span" style="white-space:pre">      </span>(%rdi), %xmm1<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">   </span>pmovzxbw<span class="Apple-tab-span" style="white-space:pre">    </span>%xmm1, %xmm0<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>movdqa<span class="Apple-tab-span" style="white-space:pre">      </span>{{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">       </span>pand<span class="Apple-tab-span" style="white-space:pre">        </span>%xmm2, %xmm0<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>punpckhbw<span class="Apple-tab-span" style="white-space:pre">   </span>%xmm1, %xmm1    # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">       </span>pand<span class="Apple-tab-span" style="white-space:pre">        </span>%xmm2, %xmm1<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>retq<br class="">+<br class="">+; AVX1-LABEL: load_zext_16i8_to_16i16:<br class="">+; AVX1:        # BB#0: # %entry<br class="">+; AVX1-NEXT: <span class="Apple-tab-span" style="white-space:pre">   </span>vmovdqa<span class="Apple-tab-span" style="white-space:pre">     </span>(%rdi), %xmm0<br class="">+; AVX1-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>vpxor<span class="Apple-tab-span" style="white-space:pre">       </span>%xmm1, %xmm1, %xmm1<br class="">+; AVX1-NEXT: <span class="Apple-tab-span" style="white-space:pre">      </span>vpunpckhbw<span class="Apple-tab-span" style="white-space:pre">  </span>%xmm1, %xmm0, %xmm1 # xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]<br class="">+; AVX1-NEXT: <span class="Apple-tab-span" style="white-space:pre"> </span>vpmovzxbw<span class="Apple-tab-span" style="white-space:pre">   </span>%xmm0, %xmm0<br class="">+; AVX1-NEXT: <span class="Apple-tab-span" style="white-space:pre">     </span>vinsertf128<span class="Apple-tab-span" style="white-space:pre"> </span>$1, %xmm1, %ymm0, %ymm0<br class="">+; AVX1-NEXT: <span class="Apple-tab-span" style="white-space:pre">  </span>retq<br class="">+<br class="">+; AVX2-LABEL: load_zext_16i8_to_16i16:<br class="">+; AVX2:        # BB#0: # %entry<br class="">+; AVX2-NEXT: <span class="Apple-tab-span" style="white-space:pre">   </span>vpmovzxbw<span class="Apple-tab-span" style="white-space:pre">   </span>(%rdi), %ymm0<br class="">+; AVX2-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>retq<br class="">+entry:<br class="">+ %X = load <16 x i8>* %ptr<br class="">+ %Y = zext <16 x i8> %X to <16 x i16><br class="">+ ret <16 x i16> %Y<br class="">+}<br class="">+<br class="">+define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {<br class="">+; SSE2-LABEL: load_zext_8i16_to_8i32:<br class="">+; SSE2:          # BB#0: # %entry<br class="">+; SSE2-NEXT:   movdqa        (%rdi), %xmm1<br class="">+; SSE2-NEXT:   movdqa        %xmm1, %xmm0<br class="">+; SSE2-NEXT:   punpcklwd     %xmm0, %xmm0    # xmm0 = xmm0[0,0,1,1,2,2,3,3]<br class="">+; SSE2-NEXT:   movdqa        {{.*#+}} xmm2 = [65535,65535,65535,65535]<br class="">+; SSE2-NEXT:   pand  %xmm2, %xmm0<br class="">+; SSE2-NEXT:   punpckhwd     %xmm1, %xmm1    # xmm1 = xmm1[4,4,5,5,6,6,7,7]<br class="">+; SSE2-NEXT:   pand  %xmm2, %xmm1<br class="">+; SSE2-NEXT:   retq<br class="">+<br class="">+; SSSE3-LABEL: load_zext_8i16_to_8i32:<br class="">+; SSSE3:        # BB#0: # %entry<br class="">+; SSSE3-NEXT:   movdqa        (%rdi), %xmm1<br class="">+; SSSE3-NEXT:   movdqa        %xmm1, %xmm0<br class="">+; SSSE3-NEXT:   punpcklwd     %xmm0, %xmm0    # xmm0 = xmm0[0,0,1,1,2,2,3,3]<br class="">+; SSSE3-NEXT:   movdqa        {{.*#+}} xmm2 = [65535,65535,65535,65535]<br class="">+; SSSE3-NEXT:   pand  %xmm2, %xmm0<br class="">+; SSSE3-NEXT:   punpckhwd     %xmm1, %xmm1    # xmm1 = xmm1[4,4,5,5,6,6,7,7]<br class="">+; SSSE3-NEXT:   pand  %xmm2, %xmm1<br class="">+; SSSE3-NEXT:   retq<br class="">+<br class="">+; SSE41-LABEL: load_zext_8i16_to_8i32:<br class="">+; SSE41:        # BB#0: # %entry<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>movdqa<span class="Apple-tab-span" style="white-space:pre">      </span>(%rdi), %xmm1<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">   </span>pmovzxwd<span class="Apple-tab-span" style="white-space:pre">    </span>%xmm1, %xmm0<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>movdqa<span class="Apple-tab-span" style="white-space:pre">      </span>{{.*#+}} xmm2 = [65535,65535,65535,65535]<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">       </span>pand<span class="Apple-tab-span" style="white-space:pre">        </span>%xmm2, %xmm0<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>punpckhwd<span class="Apple-tab-span" style="white-space:pre">   </span>%xmm1, %xmm1    # xmm1 = xmm1[4,4,5,5,6,6,7,7]<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">   </span>pand<span class="Apple-tab-span" style="white-space:pre">        </span>%xmm2, %xmm1<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>retq<br class="">+<br class="">+; AVX1-LABEL: load_zext_8i16_to_8i32:<br class="">+; AVX1:        # BB#0: # %entry<br class="">+; AVX1-NEXT:    vmovdqa       (%rdi), %xmm0<br class="">+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1<br class="">+; AVX1-NEXT:    vpunpckhwd    %xmm1, %xmm0, %xmm1 # xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]<br class="">+; AVX1-NEXT:    vpmovzxwd     %xmm0, %xmm0<br class="">+; AVX1-NEXT:    vinsertf128   $1, %xmm1, %ymm0, %ymm0<br class="">+; AVX1-NEXT:    retq<br class="">+<br class="">+; AVX2-LABEL: load_zext_8i16_to_8i32:<br class="">+; AVX2:        # BB#0: # %entry<br class="">+; AVX2-NEXT: <span class="Apple-tab-span" style="white-space:pre"> </span>vpmovzxwd<span class="Apple-tab-span" style="white-space:pre">   </span>(%rdi), %ymm0<br class="">+; AVX2-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>retq<br class="">+entry:<br class="">+ %X = load <8 x i16>* %ptr<br class="">+ %Y = zext <8 x i16> %X to <8 x i32><br class="">+ ret <8 x i32>%Y<br class="">+}<br class="">+<br class="">+define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {<br class="">+; SSE2-LABEL: load_zext_4i32_to_4i64:<br class="">+; SSE2:          # BB#0: # %entry<br class="">+; SSE2-NEXT:   movdqa        (%rdi), %xmm1<br class="">+; SSE2-NEXT:   pshufd        $-44, %xmm1, %xmm0      # xmm0 = xmm1[0,1,1,3]<br class="">+; SSE2-NEXT:   movdqa        {{.*#+}} xmm2 = [4294967295,4294967295]<br class="">+; SSE2-NEXT:   pand  %xmm2, %xmm0<br class="">+; SSE2-NEXT:   pshufd        $-6, %xmm1, %xmm1       # xmm1 = xmm1[2,2,3,3]<br class="">+; SSE2-NEXT:   pand  %xmm2, %xmm1<br class="">+; SSE2-NEXT:   retq<br class="">+<br class="">+; SSSE3-LABEL: load_zext_4i32_to_4i64:<br class="">+; SSSE3:        # BB#0: # %entry<br class="">+; SSSE3-NEXT:   movdqa        (%rdi), %xmm1<br class="">+; SSSE3-NEXT:   pshufd        $-44, %xmm1, %xmm0      # xmm0 = xmm1[0,1,1,3]<br class="">+; SSSE3-NEXT:   movdqa        {{.*#+}} xmm2 = [4294967295,4294967295]<br class="">+; SSSE3-NEXT:   pand  %xmm2, %xmm0<br class="">+; SSSE3-NEXT:   pshufd        $-6, %xmm1, %xmm1       # xmm1 = xmm1[2,2,3,3]<br class="">+; SSSE3-NEXT:   pand  %xmm2, %xmm1<br class="">+; SSSE3-NEXT:   retq<br class="">+<br class="">+; SSE41-LABEL: load_zext_4i32_to_4i64:<br class="">+; SSE41:        # BB#0: # %entry<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">      </span>movdqa<span class="Apple-tab-span" style="white-space:pre">      </span>(%rdi), %xmm1<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">   </span>pmovzxdq<span class="Apple-tab-span" style="white-space:pre">    </span>%xmm1, %xmm0<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>movdqa<span class="Apple-tab-span" style="white-space:pre">      </span>{{.*#+}} xmm2 = [4294967295,4294967295]<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre"> </span>pand<span class="Apple-tab-span" style="white-space:pre">        </span>%xmm2, %xmm0<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>pshufd<span class="Apple-tab-span" style="white-space:pre">      </span>$-6, %xmm1, %xmm1       # xmm1 = xmm1[2,2,3,3]<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>pand<span class="Apple-tab-span" style="white-space:pre">        </span>%xmm2, %xmm1<br class="">+; SSE41-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>retq<br class="">+<br class="">+; AVX1-LABEL: load_zext_4i32_to_4i64:<br class="">+; AVX1:        # BB#0: # %entry<br class="">+; AVX1-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>vmovdqa<span class="Apple-tab-span" style="white-space:pre">     </span>(%rdi), %xmm0<br class="">+; AVX1-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>vpxor<span class="Apple-tab-span" style="white-space:pre">       </span>%xmm1, %xmm1, %xmm1<br class="">+; AVX1-NEXT: <span class="Apple-tab-span" style="white-space:pre">      </span>vpunpckhdq<span class="Apple-tab-span" style="white-space:pre">  </span>%xmm1, %xmm0, %xmm1 # xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]<br class="">+; AVX1-NEXT: <span class="Apple-tab-span" style="white-space:pre">     </span>vpmovzxdq<span class="Apple-tab-span" style="white-space:pre">   </span>%xmm0, %xmm0<br class="">+; AVX1-NEXT: <span class="Apple-tab-span" style="white-space:pre">     </span>vinsertf128<span class="Apple-tab-span" style="white-space:pre"> </span>$1, %xmm1, %ymm0, %ymm0<br class="">+; AVX1-NEXT: <span class="Apple-tab-span" style="white-space:pre">  </span>retq<br class="">+<br class="">+; AVX2-LABEL: load_zext_4i32_to_4i64:<br class="">+; AVX2:        # BB#0: # %entry<br class="">+; AVX2-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>vpmovzxdq<span class="Apple-tab-span" style="white-space:pre">   </span>(%rdi), %ymm0<br class="">+; AVX2-NEXT: <span class="Apple-tab-span" style="white-space:pre">    </span>retq<br class="">+entry:<br class="">+ %X = load <4 x i32>* %ptr<br class="">+ %Y = zext <4 x i32> %X to <4 x i64><br class="">+ ret <4 x i64>%Y<br class="">+}<br class=""><br class=""><br class="">_______________________________________________<br class="">llvm-commits mailing list<br class=""><a href="mailto:llvm-commits@cs.uiuc.edu" class="">llvm-commits@cs.uiuc.edu</a><br class="">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits<br class=""></div></blockquote></div><br class=""></div></body></html>