[llvm] r223567 - [X86] Refactor PMOV[SZ]Xrm to add missing AVX2 patterns.

Quentin Colombet qcolombet at apple.com
Thu Dec 11 11:23:09 PST 2014


Hi Ahmed,

Thanks for the quick fix.

On Dec 10, 2014, at 8:33 PM, Ahmed Bougacha <ahmed.bougacha at gmail.com> wrote:

> Ah, good catch! Those patterns were there originally, but I removed
> them before committing
> (http://reviews.llvm.org/D6125?id=15839#inline-51408).
> We thought that we couldn't get to v32i8, but I didn't think about
> other originators of VZEXT nodes - i.e., the shuffle lowering!
> 
> I don't think we can write a testcase (from zext), so I'll let you
> commit a test if you have a clean one (I'm not a fan of the
> shuffle-lowering flag, but if that's the only way to get to it I guess
> that's fine?)

I haven’t something else at hand but I think we should put what we have, even if we kill it when we kill the old vector shuffle lowering.
I’ll do that this afternoon, unless you’ve beaten me at it :).

Thanks,
-Quentin

> 
> Anyway, r223996 should fix it.
> 
> Sorry about the breakage!
> - Ahmed
> 
> 
> On Wed, Dec 10, 2014 at 6:03 PM, Quentin Colombet <qcolombet at apple.com> wrote:
>> Hi Ahmed,
>> 
>> I’ve bisected an instruction selection failure with the old vector shuffle
>> to this commit: http://llvm.org/bugs/show_bug.cgi?id=21876
>> 
>> Would you mind having a look?
>> 
>> Thanks,
>> -Quentin
>> 
>> On Dec 5, 2014, at 5:31 PM, Ahmed Bougacha <ahmed.bougacha at gmail.com> wrote:
>> 
>> Author: ab
>> Date: Fri Dec  5 19:31:07 2014
>> New Revision: 223567
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=223567&view=rev
>> Log:
>> [X86] Refactor PMOV[SZ]Xrm to add missing AVX2 patterns.
>> 
>> Most patterns will go away once the extload legalization changes land.
>> 
>> Differential Revision: http://reviews.llvm.org/D6125
>> 
>> Added:
>>   llvm/trunk/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
>>   llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
>> Modified:
>>   llvm/trunk/lib/Target/X86/X86InstrSSE.td
>>   llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>>   llvm/trunk/test/CodeGen/X86/vector-sext.ll
>>   llvm/trunk/test/CodeGen/X86/vector-zext.ll
>> 
>> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=223567&r1=223566&r2=223567&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
>> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Dec  5 19:31:07 2014
>> @@ -6094,552 +6094,223 @@ def : InstAlias<"monitor\t{%rax, %rcx, %
>> // SSE4.1 - Packed Move with Sign/Zero Extend
>> //===----------------------------------------------------------------------===//
>> 
>> -multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic
>> IntId,
>> -                               OpndItins itins = DEFAULT_ITINS> {
>> -  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
>> +multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand
>> MemOp,
>> +                          RegisterClass OutRC, RegisterClass InRC,
>> +                          OpndItins itins> {
>> +  def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src),
>>                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>> -                 [(set VR128:$dst, (IntId VR128:$src))], itins.rr>,
>> +                 [], itins.rr>,
>>                 Sched<[itins.Sched]>;
>> 
>> -  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
>> +  def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src),
>>                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>> -       [(set VR128:$dst,
>> -         (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64
>> addr:$src))))))],
>> -         itins.rm>, Sched<[itins.Sched.Folded]>;
>> +                 [],
>> +                 itins.rm>, Sched<[itins.Sched.Folded]>;
>> }
>> 
>> -multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
>> -                                 Intrinsic IntId, X86FoldableSchedWrite
>> Sched> {
>> -  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
>> -                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>> -                  [(set VR256:$dst, (IntId VR128:$src))]>, Sched<[Sched]>;
>> -
>> -  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
>> -                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>> -                  [(set VR256:$dst, (IntId (load addr:$src)))]>,
>> -                  Sched<[Sched.Folded]>;
>> +multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
>> +                          X86MemOperand MemOp, X86MemOperand MemYOp,
>> +                          OpndItins SSEItins, OpndItins AVXItins,
>> +                          OpndItins AVX2Itins> {
>> +  defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128,
>> SSEItins>;
>> +  let Predicates = [HasAVX] in
>> +    defm V#NAME   : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr),
>> MemOp,
>> +                                     VR128, VR128, AVXItins>, VEX;
>> +  let Predicates = [HasAVX2] in
>> +    defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr),
>> MemYOp,
>> +                                     VR256, VR128, AVX2Itins>, VEX, VEX_L;
>> +}
>> +
>> +multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr,
>> +                                X86MemOperand MemOp, X86MemOperand MemYOp>
>> {
>> +  defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx",
>> OpcodeStr),
>> +                                        MemOp, MemYOp,
>> +                                        SSE_INTALU_ITINS_SHUFF_P,
>> +                                        DEFAULT_ITINS_SHUFFLESCHED,
>> +                                        DEFAULT_ITINS_SHUFFLESCHED>;
>> +  defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10),
>> +                                        !strconcat("pmovzx", OpcodeStr),
>> +                                        MemOp, MemYOp,
>> +                                        SSE_INTALU_ITINS_SHUFF_P,
>> +                                        DEFAULT_ITINS_SHUFFLESCHED,
>> +                                        DEFAULT_ITINS_SHUFFLESCHED>;
>> +}
>> +
>> +defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem>;
>> +defm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem>;
>> +defm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem>;
>> +
>> +defm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem>;
>> +defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem>;
>> +
>> +defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem>;
>> +
>> +// AVX2 Patterns
>> +multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, SDNode ExtOp> {
>> +  // Register-Register patterns
>> +  def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
>> +            (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
>> +  def : Pat<(v8i32 (ExtOp (v16i8 VR128:$src))),
>> +            (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;
>> +  def : Pat<(v4i64 (ExtOp (v16i8 VR128:$src))),
>> +            (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;
>> +
>> +  def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
>> +            (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
>> +  def : Pat<(v4i64 (ExtOp (v8i16 VR128:$src))),
>> +            (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;
>> +
>> +  def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
>> +            (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
>> +
>> +  // AVX2 Register-Memory patterns
>> +  def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
>> +  def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
>> +  def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
>> +  def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
>> +
>> +  def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64
>> addr:$src)))))),
>> +            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
>> +  def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
>> +  def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
>> +  def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
>> +
>> +  def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32
>> addr:$src)))))),
>> +            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
>> +  def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
>> +  def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
>> +  def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
>> +
>> +  def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
>> +  def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
>> +  def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
>> +  def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
>> +
>> +  def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64
>> addr:$src)))))),
>> +            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
>> +  def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
>> +  def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
>> +  def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
>> +
>> +  def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
>> +  def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
>> +  def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
>> +  def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
>> }
>> 
>> -let Predicates = [HasAVX] in {
>> -defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw",
>> -                                     int_x86_sse41_pmovsxbw,
>> -                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;
>> -defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd",
>> -                                     int_x86_sse41_pmovsxwd,
>> -                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;
>> -defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq",
>> -                                     int_x86_sse41_pmovsxdq,
>> -                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;
>> -defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw",
>> -                                     int_x86_sse41_pmovzxbw,
>> -                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;
>> -defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd",
>> -                                     int_x86_sse41_pmovzxwd,
>> -                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;
>> -defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq",
>> -                                     int_x86_sse41_pmovzxdq,
>> -                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;
>> -}
>> -
>> -let Predicates = [HasAVX2] in {
>> -defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw",
>> -                                        int_x86_avx2_pmovsxbw,
>> -                                        WriteShuffle>, VEX, VEX_L;
>> -defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd",
>> -                                        int_x86_avx2_pmovsxwd,
>> -                                        WriteShuffle>, VEX, VEX_L;
>> -defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq",
>> -                                        int_x86_avx2_pmovsxdq,
>> -                                        WriteShuffle>, VEX, VEX_L;
>> -defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw",
>> -                                        int_x86_avx2_pmovzxbw,
>> -                                        WriteShuffle>, VEX, VEX_L;
>> -defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd",
>> -                                        int_x86_avx2_pmovzxwd,
>> -                                        WriteShuffle>, VEX, VEX_L;
>> -defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
>> -                                        int_x86_avx2_pmovzxdq,
>> -                                        WriteShuffle>, VEX, VEX_L;
>> -}
>> -
>> -defm PMOVSXBW   : SS41I_binop_rm_int8<0x20, "pmovsxbw",
>> int_x86_sse41_pmovsxbw,
>> -                                      SSE_INTALU_ITINS_SHUFF_P>;
>> -defm PMOVSXWD   : SS41I_binop_rm_int8<0x23, "pmovsxwd",
>> int_x86_sse41_pmovsxwd,
>> -                                      SSE_INTALU_ITINS_SHUFF_P>;
>> -defm PMOVSXDQ   : SS41I_binop_rm_int8<0x25, "pmovsxdq",
>> int_x86_sse41_pmovsxdq,
>> -                                      SSE_INTALU_ITINS_SHUFF_P>;
>> -defm PMOVZXBW   : SS41I_binop_rm_int8<0x30, "pmovzxbw",
>> int_x86_sse41_pmovzxbw,
>> -                                      SSE_INTALU_ITINS_SHUFF_P>;
>> -defm PMOVZXWD   : SS41I_binop_rm_int8<0x33, "pmovzxwd",
>> int_x86_sse41_pmovzxwd,
>> -                                      SSE_INTALU_ITINS_SHUFF_P>;
>> -defm PMOVZXDQ   : SS41I_binop_rm_int8<0x35, "pmovzxdq",
>> int_x86_sse41_pmovzxdq,
>> -                                      SSE_INTALU_ITINS_SHUFF_P>;
>> -
>> -let Predicates = [HasAVX] in {
>> -  // Common patterns involving scalar load.
>> -  def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
>> -            (VPMOVSXBWrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
>> -            (VPMOVSXBWrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
>> -            (VPMOVSXBWrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
>> -            (VPMOVSXWDrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
>> -            (VPMOVSXWDrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
>> -            (VPMOVSXWDrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
>> -            (VPMOVSXDQrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
>> -            (VPMOVSXDQrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
>> -            (VPMOVSXDQrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
>> -            (VPMOVZXBWrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
>> -            (VPMOVZXBWrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
>> -            (VPMOVZXBWrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
>> -            (VPMOVZXWDrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
>> -            (VPMOVZXWDrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
>> -            (VPMOVZXWDrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
>> -            (VPMOVZXDQrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
>> -            (VPMOVZXDQrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
>> -            (VPMOVZXDQrm addr:$src)>;
>> -}
>> -
>> -let Predicates = [UseSSE41] in {
>> -  // Common patterns involving scalar load.
>> -  def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
>> -            (PMOVSXBWrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
>> -            (PMOVSXBWrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))),
>> -            (PMOVSXBWrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
>> -            (PMOVSXWDrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
>> -            (PMOVSXWDrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))),
>> -            (PMOVSXWDrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
>> -            (PMOVSXDQrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
>> -            (PMOVSXDQrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))),
>> -            (PMOVSXDQrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
>> -            (PMOVZXBWrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
>> -            (PMOVZXBWrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))),
>> -            (PMOVZXBWrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
>> -            (PMOVZXWDrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
>> -            (PMOVZXWDrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))),
>> -            (PMOVZXWDrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
>> -            (PMOVZXDQrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
>> -            (PMOVZXDQrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))),
>> -            (PMOVZXDQrm addr:$src)>;
>> -}
>> -
>> -multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic
>> IntId,
>> -                               OpndItins itins = DEFAULT_ITINS> {
>> -  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
>> -                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>> -                 [(set VR128:$dst, (IntId VR128:$src))], itins.rr>,
>> -                 Sched<[itins.Sched]>;
>> -
>> -  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
>> -                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>> -       [(set VR128:$dst,
>> -         (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32
>> addr:$src))))))],
>> -         itins.rm>, Sched<[itins.Sched.Folded]>;
>> -}
>> -
>> -multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr,
>> -                                 Intrinsic IntId, X86FoldableSchedWrite
>> Sched> {
>> -  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
>> -                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>> -                  [(set VR256:$dst, (IntId VR128:$src))]>, Sched<[Sched]>;
>> -
>> -  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src),
>> -                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>> -       [(set VR256:$dst,
>> -         (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64
>> addr:$src))))))]>,
>> -         Sched<[Sched.Folded]>;
>> -}
>> -
>> -let Predicates = [HasAVX] in {
>> -defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd",
>> int_x86_sse41_pmovsxbd,
>> -                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;
>> -defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq",
>> int_x86_sse41_pmovsxwq,
>> -                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;
>> -defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd",
>> int_x86_sse41_pmovzxbd,
>> -                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;
>> -defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq",
>> int_x86_sse41_pmovzxwq,
>> -                                     DEFAULT_ITINS_SHUFFLESCHED>, VEX;
>> -}
>> -
>> -let Predicates = [HasAVX2] in {
>> -defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd",
>> -                                       int_x86_avx2_pmovsxbd,
>> WriteShuffle>,
>> -                                       VEX, VEX_L;
>> -defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq",
>> -                                       int_x86_avx2_pmovsxwq,
>> WriteShuffle>,
>> -                                       VEX, VEX_L;
>> -defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd",
>> -                                       int_x86_avx2_pmovzxbd,
>> WriteShuffle>,
>> -                                       VEX, VEX_L;
>> -defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
>> -                                       int_x86_avx2_pmovzxwq,
>> WriteShuffle>,
>> -                                       VEX, VEX_L;
>> -}
>> -
>> -defm PMOVSXBD   : SS41I_binop_rm_int4<0x21, "pmovsxbd",
>> int_x86_sse41_pmovsxbd,
>> -                                      SSE_INTALU_ITINS_SHUFF_P>;
>> -defm PMOVSXWQ   : SS41I_binop_rm_int4<0x24, "pmovsxwq",
>> int_x86_sse41_pmovsxwq,
>> -                                      SSE_INTALU_ITINS_SHUFF_P>;
>> -defm PMOVZXBD   : SS41I_binop_rm_int4<0x31, "pmovzxbd",
>> int_x86_sse41_pmovzxbd,
>> -                                      SSE_INTALU_ITINS_SHUFF_P>;
>> -defm PMOVZXWQ   : SS41I_binop_rm_int4<0x34, "pmovzxwq",
>> int_x86_sse41_pmovzxwq,
>> -                                      SSE_INTALU_ITINS_SHUFF_P>;
>> -
>> -let Predicates = [HasAVX] in {
>> -  // Common patterns involving scalar load
>> -  def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
>> -            (VPMOVSXBDrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
>> -            (VPMOVSXWQrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
>> -            (VPMOVZXBDrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
>> -            (VPMOVZXWQrm addr:$src)>;
>> -}
>> -
>> -let Predicates = [UseSSE41] in {
>> -  // Common patterns involving scalar load
>> -  def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
>> -            (PMOVSXBDrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
>> -            (PMOVSXWQrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
>> -            (PMOVZXBDrm addr:$src)>;
>> -  def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
>> -            (PMOVZXWQrm addr:$src)>;
>> -}
>> -
>> -multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic
>> IntId,
>> -                               X86FoldableSchedWrite Sched> {
>> -  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
>> -                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>> -                 [(set VR128:$dst, (IntId VR128:$src))]>, Sched<[Sched]>;
>> -
>> -  // Expecting a i16 load any extended to i32 value.
>> -  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
>> -                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>> -                 [(set VR128:$dst, (IntId (bitconvert
>> -                     (v4i32 (scalar_to_vector (loadi16_anyext
>> addr:$src))))))]>,
>> -                 Sched<[Sched.Folded]>;
>> -}
>> -
>> -multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr,
>> -                                 Intrinsic IntId, X86FoldableSchedWrite
>> Sched> {
>> -  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
>> -                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>> -                 [(set VR256:$dst, (IntId VR128:$src))]>, Sched<[Sched]>;
>> -
>> -  // Expecting a i16 load any extended to i32 value.
>> -  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src),
>> -                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>> -                  [(set VR256:$dst, (IntId (bitconvert
>> -                      (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
>> -                 Sched<[Sched.Folded]>;
>> -}
>> -
>> -let Predicates = [HasAVX] in {
>> -defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq",
>> int_x86_sse41_pmovsxbq,
>> -                                     WriteShuffle>, VEX;
>> -defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq",
>> int_x86_sse41_pmovzxbq,
>> -                                     WriteShuffle>, VEX;
>> -}
>> let Predicates = [HasAVX2] in {
>> -defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq",
>> int_x86_avx2_pmovsxbq,
>> -                                       WriteShuffle>, VEX, VEX_L;
>> -defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
>> int_x86_avx2_pmovzxbq,
>> -                                       WriteShuffle>, VEX, VEX_L;
>> -}
>> -defm PMOVSXBQ   : SS41I_binop_rm_int2<0x22, "pmovsxbq",
>> int_x86_sse41_pmovsxbq,
>> -                                      WriteShuffle>;
>> -defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovzxbq",
>> int_x86_sse41_pmovzxbq,
>> -                                      WriteShuffle>;
>> -
>> -let Predicates = [HasAVX2] in {
>> -  def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr
>> VR128:$src)>;
>> -  def : Pat<(v8i32  (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr
>> VR128:$src)>;
>> -  def : Pat<(v4i64  (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v8i32  (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr
>> VR128:$src)>;
>> -  def : Pat<(v4i64  (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v4i64  (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))),
>> -            (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
>> -  def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))),
>> -            (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
>> -  def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))),
>> -            (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
>> -
>> -  def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))),
>> -            (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
>> -  def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))),
>> -            (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
>> -
>> -  def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))),
>> -            (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
>> -
>> -  def : Pat<(v8i32 (X86vsext (v8i16 (bitconvert (v2i64 (load
>> addr:$src)))))),
>> -            (VPMOVSXWDYrm addr:$src)>;
>> -  def : Pat<(v4i64 (X86vsext (v4i32 (bitconvert (v2i64 (load
>> addr:$src)))))),
>> -            (VPMOVSXDQYrm addr:$src)>;
>> -
>> -  def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
>> -                    (scalar_to_vector (loadi64 addr:$src))))))),
>> -            (VPMOVSXBDYrm addr:$src)>;
>> -  def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64
>> -                    (scalar_to_vector (loadf64 addr:$src))))))),
>> -            (VPMOVSXBDYrm addr:$src)>;
>> -
>> -  def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64
>> -                    (scalar_to_vector (loadi64 addr:$src))))))),
>> -            (VPMOVSXWQYrm addr:$src)>;
>> -  def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64
>> -                    (scalar_to_vector (loadf64 addr:$src))))))),
>> -            (VPMOVSXWQYrm addr:$src)>;
>> -
>> -  def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32
>> -                    (scalar_to_vector (loadi32 addr:$src))))))),
>> -            (VPMOVSXBQYrm addr:$src)>;
>> -}
>> -
>> -let Predicates = [HasAVX] in {
>> -  // Common patterns involving scalar load
>> -  def : Pat<(int_x86_sse41_pmovsxbq
>> -              (bitconvert (v4i32 (X86vzmovl
>> -                            (v4i32 (scalar_to_vector (loadi32
>> addr:$src))))))),
>> -            (VPMOVSXBQrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovzxbq
>> -              (bitconvert (v4i32 (X86vzmovl
>> -                            (v4i32 (scalar_to_vector (loadi32
>> addr:$src))))))),
>> -            (VPMOVZXBQrm addr:$src)>;
>> +  defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", X86vsext>;
>> +  defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", X86vzext>;
>> }
>> 
>> -let Predicates = [UseSSE41] in {
>> -  def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr
>> VR128:$src)>;
>> -  def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr
>> VR128:$src)>;
>> -  def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr
>> VR128:$src)>;
>> -  def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr
>> VR128:$src)>;
>> -
>> -  // Common patterns involving scalar load
>> -  def : Pat<(int_x86_sse41_pmovsxbq
>> -              (bitconvert (v4i32 (X86vzmovl
>> -                            (v4i32 (scalar_to_vector (loadi32
>> addr:$src))))))),
>> -            (PMOVSXBQrm addr:$src)>;
>> -
>> -  def : Pat<(int_x86_sse41_pmovzxbq
>> -              (bitconvert (v4i32 (X86vzmovl
>> -                            (v4i32 (scalar_to_vector (loadi32
>> addr:$src))))))),
>> -            (PMOVZXBQrm addr:$src)>;
>> -
>> -  def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
>> -                    (scalar_to_vector (loadi64 addr:$src))))))),
>> -            (PMOVSXWDrm addr:$src)>;
>> -  def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
>> -                    (scalar_to_vector (loadf64 addr:$src))))))),
>> -            (PMOVSXWDrm addr:$src)>;
>> -  def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
>> -                    (scalar_to_vector (loadi32 addr:$src))))))),
>> -            (PMOVSXBDrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
>> -                    (scalar_to_vector (loadi32 addr:$src))))))),
>> -            (PMOVSXWQrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
>> -                    (scalar_to_vector (extloadi32i16 addr:$src))))))),
>> -            (PMOVSXBQrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
>> -                    (scalar_to_vector (loadi64 addr:$src))))))),
>> -            (PMOVSXDQrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
>> -                    (scalar_to_vector (loadf64 addr:$src))))))),
>> -            (PMOVSXDQrm addr:$src)>;
>> -  def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
>> -                    (scalar_to_vector (loadi64 addr:$src))))))),
>> -            (PMOVSXBWrm addr:$src)>;
>> -  def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
>> -                    (scalar_to_vector (loadf64 addr:$src))))))),
>> -            (PMOVSXBWrm addr:$src)>;
>> -}
>> -
>> -let Predicates = [HasAVX2] in {
>> -  def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr
>> VR128:$src)>;
>> -  def : Pat<(v8i32  (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr
>> VR128:$src)>;
>> -  def : Pat<(v4i64  (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v8i32  (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr
>> VR128:$src)>;
>> -  def : Pat<(v4i64  (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v4i64  (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v16i16 (X86vzext (v32i8 VR256:$src))),
>> -            (VPMOVZXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
>> -  def : Pat<(v8i32 (X86vzext (v32i8 VR256:$src))),
>> -            (VPMOVZXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
>> -  def : Pat<(v4i64 (X86vzext (v32i8 VR256:$src))),
>> -            (VPMOVZXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
>> -
>> -  def : Pat<(v8i32 (X86vzext (v16i16 VR256:$src))),
>> -            (VPMOVZXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
>> -  def : Pat<(v4i64 (X86vzext (v16i16 VR256:$src))),
>> -            (VPMOVZXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
>> -
>> -  def : Pat<(v4i64 (X86vzext (v8i32 VR256:$src))),
>> -            (VPMOVZXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
>> +// SSE4.1/AVX patterns.
>> +multiclass SS41I_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
>> +                                PatFrag ExtLoad16> {
>> +  def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))),
>> +            (!cast<I>(OpcPrefix#BWrr) VR128:$src)>;
>> +  def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))),
>> +            (!cast<I>(OpcPrefix#BDrr) VR128:$src)>;
>> +  def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))),
>> +            (!cast<I>(OpcPrefix#BQrr) VR128:$src)>;
>> +
>> +  def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))),
>> +            (!cast<I>(OpcPrefix#WDrr) VR128:$src)>;
>> +  def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))),
>> +            (!cast<I>(OpcPrefix#WQrr) VR128:$src)>;
>> +
>> +  def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))),
>> +            (!cast<I>(OpcPrefix#DQrr) VR128:$src)>;
>> +
>> +  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64
>> addr:$src)))))),
>> +            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
>> +  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64
>> addr:$src)))))),
>> +            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
>> +  def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
>> +  def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
>> +  def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
>> +
>> +  def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32
>> addr:$src)))))),
>> +            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
>> +  def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
>> +  def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
>> +  def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
>> +
>> +  def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16
>> addr:$src)))))),
>> +            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
>> +  def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
>> +  def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
>> +  def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
>> +
>> +  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64
>> addr:$src)))))),
>> +            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
>> +  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64
>> addr:$src)))))),
>> +            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
>> +  def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
>> +  def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
>> +  def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
>> +
>> +  def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32
>> addr:$src)))))),
>> +            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
>> +  def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
>> +  def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
>> +  def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
>> +
>> +  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64
>> addr:$src)))))),
>> +            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
>> +  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64
>> addr:$src)))))),
>> +            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
>> +  def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
>> +  def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
>> +  def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
>> +            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
>> }
>> 
>> let Predicates = [HasAVX] in {
>> -  def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr
>> VR128:$src)>;
>> -  def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr
>> VR128:$src)>;
>> -  def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr
>> VR128:$src)>;
>> -  def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector
>> (loadi64 addr:$src))))))),
>> -            (VPMOVZXBWrm addr:$src)>;
>> -  def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector
>> (loadf64 addr:$src))))))),
>> -            (VPMOVZXBWrm addr:$src)>;
>> -  def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector
>> (loadi32 addr:$src))))))),
>> -            (VPMOVZXBDrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector
>> (loadi16_anyext addr:$src))))))),
>> -            (VPMOVZXBQrm addr:$src)>;
>> -
>> -  def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector
>> (loadi64 addr:$src))))))),
>> -            (VPMOVZXWDrm addr:$src)>;
>> -  def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector
>> (loadf64 addr:$src))))))),
>> -            (VPMOVZXWDrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector
>> (loadi32 addr:$src))))))),
>> -            (VPMOVZXWQrm addr:$src)>;
>> -
>> -  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector
>> (loadi64 addr:$src))))))),
>> -            (VPMOVZXDQrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector
>> (loadf64 addr:$src))))))),
>> -            (VPMOVZXDQrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload
>> addr:$src)))))),
>> -            (VPMOVZXDQrm addr:$src)>;
>> -
>> -  def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr
>> VR128:$src)>;
>> -  def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr
>> VR128:$src)>;
>> -  def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr
>> VR128:$src)>;
>> -  def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
>> -                    (scalar_to_vector (loadi64 addr:$src))))))),
>> -            (VPMOVSXWDrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
>> -                    (scalar_to_vector (loadi64 addr:$src))))))),
>> -            (VPMOVSXDQrm addr:$src)>;
>> -  def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
>> -                    (scalar_to_vector (loadf64 addr:$src))))))),
>> -            (VPMOVSXWDrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
>> -                    (scalar_to_vector (loadf64 addr:$src))))))),
>> -            (VPMOVSXDQrm addr:$src)>;
>> -  def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
>> -                    (scalar_to_vector (loadi64 addr:$src))))))),
>> -            (VPMOVSXBWrm addr:$src)>;
>> -  def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
>> -                    (scalar_to_vector (loadf64 addr:$src))))))),
>> -            (VPMOVSXBWrm addr:$src)>;
>> -
>> -  def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
>> -                    (scalar_to_vector (loadi32 addr:$src))))))),
>> -            (VPMOVSXBDrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
>> -                    (scalar_to_vector (loadi32 addr:$src))))))),
>> -            (VPMOVSXWQrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
>> -                    (scalar_to_vector (extloadi32i16 addr:$src))))))),
>> -            (VPMOVSXBQrm addr:$src)>;
>> +  defm : SS41I_pmovx_patterns<"VPMOVSX", X86vsext, extloadi32i16>;
>> +  defm : SS41I_pmovx_patterns<"VPMOVZX", X86vzext, loadi16_anyext>;
>> }
>> 
>> let Predicates = [UseSSE41] in {
>> -  def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr
>> VR128:$src)>;
>> -  def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr
>> VR128:$src)>;
>> -  def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr
>> VR128:$src)>;
>> -  def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (PMOVZXWQrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr
>> VR128:$src)>;
>> -
>> -  def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector
>> (loadi64 addr:$src))))))),
>> -            (PMOVZXBWrm addr:$src)>;
>> -  def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector
>> (loadf64 addr:$src))))))),
>> -            (PMOVZXBWrm addr:$src)>;
>> -  def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector
>> (loadi32 addr:$src))))))),
>> -            (PMOVZXBDrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector
>> (loadi16_anyext addr:$src))))))),
>> -            (PMOVZXBQrm addr:$src)>;
>> -
>> -  def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector
>> (loadi64 addr:$src))))))),
>> -            (PMOVZXWDrm addr:$src)>;
>> -  def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector
>> (loadf64 addr:$src))))))),
>> -            (PMOVZXWDrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector
>> (loadi32 addr:$src))))))),
>> -            (PMOVZXWQrm addr:$src)>;
>> -
>> -  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector
>> (loadi64 addr:$src))))))),
>> -            (PMOVZXDQrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector
>> (loadf64 addr:$src))))))),
>> -            (PMOVZXDQrm addr:$src)>;
>> -  def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload
>> addr:$src)))))),
>> -            (PMOVZXDQrm addr:$src)>;
>> +  defm : SS41I_pmovx_patterns<"PMOVSX", X86vsext, extloadi32i16>;
>> +  defm : SS41I_pmovx_patterns<"PMOVZX", X86vzext, loadi16_anyext>;
>> }
>> 
>> //===----------------------------------------------------------------------===//
>> 
>> Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=223567&r1=223566&r2=223567&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
>> +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Fri Dec  5 19:31:07 2014
>> @@ -138,6 +138,18 @@ static const IntrinsicData  IntrinsicsWi
>>  X86_INTRINSIC_DATA(avx2_pminu_b,      INTR_TYPE_2OP, X86ISD::UMIN, 0),
>>  X86_INTRINSIC_DATA(avx2_pminu_d,      INTR_TYPE_2OP, X86ISD::UMIN, 0),
>>  X86_INTRINSIC_DATA(avx2_pminu_w,      INTR_TYPE_2OP, X86ISD::UMIN, 0),
>> +  X86_INTRINSIC_DATA(avx2_pmovsxbd,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),
>> +  X86_INTRINSIC_DATA(avx2_pmovsxbq,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),
>> +  X86_INTRINSIC_DATA(avx2_pmovsxbw,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),
>> +  X86_INTRINSIC_DATA(avx2_pmovsxdq,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),
>> +  X86_INTRINSIC_DATA(avx2_pmovsxwd,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),
>> +  X86_INTRINSIC_DATA(avx2_pmovsxwq,     INTR_TYPE_1OP, X86ISD::VSEXT, 0),
>> +  X86_INTRINSIC_DATA(avx2_pmovzxbd,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),
>> +  X86_INTRINSIC_DATA(avx2_pmovzxbq,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),
>> +  X86_INTRINSIC_DATA(avx2_pmovzxbw,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),
>> +  X86_INTRINSIC_DATA(avx2_pmovzxdq,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),
>> +  X86_INTRINSIC_DATA(avx2_pmovzxwd,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),
>> +  X86_INTRINSIC_DATA(avx2_pmovzxwq,     INTR_TYPE_1OP, X86ISD::VZEXT, 0),
>>  X86_INTRINSIC_DATA(avx2_psll_d,       INTR_TYPE_2OP, X86ISD::VSHL, 0),
>>  X86_INTRINSIC_DATA(avx2_psll_q,       INTR_TYPE_2OP, X86ISD::VSHL, 0),
>>  X86_INTRINSIC_DATA(avx2_psll_w,       INTR_TYPE_2OP, X86ISD::VSHL, 0),
>> @@ -284,6 +296,18 @@ static const IntrinsicData  IntrinsicsWi
>>  X86_INTRINSIC_DATA(sse41_pminsd,      INTR_TYPE_2OP, X86ISD::SMIN, 0),
>>  X86_INTRINSIC_DATA(sse41_pminud,      INTR_TYPE_2OP, X86ISD::UMIN, 0),
>>  X86_INTRINSIC_DATA(sse41_pminuw,      INTR_TYPE_2OP, X86ISD::UMIN, 0),
>> +  X86_INTRINSIC_DATA(sse41_pmovsxbd,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
>> +  X86_INTRINSIC_DATA(sse41_pmovsxbq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
>> +  X86_INTRINSIC_DATA(sse41_pmovsxbw,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
>> +  X86_INTRINSIC_DATA(sse41_pmovsxdq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
>> +  X86_INTRINSIC_DATA(sse41_pmovsxwd,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
>> +  X86_INTRINSIC_DATA(sse41_pmovsxwq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
>> +  X86_INTRINSIC_DATA(sse41_pmovzxbd,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
>> +  X86_INTRINSIC_DATA(sse41_pmovzxbq,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
>> +  X86_INTRINSIC_DATA(sse41_pmovzxbw,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
>> +  X86_INTRINSIC_DATA(sse41_pmovzxdq,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
>> +  X86_INTRINSIC_DATA(sse41_pmovzxwd,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
>> +  X86_INTRINSIC_DATA(sse41_pmovzxwq,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
>>  X86_INTRINSIC_DATA(sse_comieq_ss,     COMI, X86ISD::COMI, ISD::SETEQ),
>>  X86_INTRINSIC_DATA(sse_comige_ss,     COMI, X86ISD::COMI, ISD::SETGE),
>>  X86_INTRINSIC_DATA(sse_comigt_ss,     COMI, X86ISD::COMI, ISD::SETGT),
>> 
>> Added: llvm/trunk/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll?rev=223567&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll (added)
>> +++ llvm/trunk/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll Fri Dec  5
>> 19:31:07 2014
>> @@ -0,0 +1,110 @@
>> +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 |
>> FileCheck %s
>> +
>> +define <16 x i16> @test_lvm_x86_avx2_pmovsxbw(<16 x i8>* %a) {
>> +; CHECK-LABEL: test_lvm_x86_avx2_pmovsxbw
>> +; CHECK: vpmovsxbw (%rdi), %ymm0
>> +  %1 = load <16 x i8>* %a, align 1
>> +  %2 = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %1)
>> +  ret <16 x i16> %2
>> +}
>> +
>> +define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbd
>> +; CHECK: vpmovsxbd (%rdi), %ymm0
>> +  %1 = load <16 x i8>* %a, align 1
>> +  %2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %1)
>> +  ret <8 x i32> %2
>> +}
>> +
>> +define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbq
>> +; CHECK: vpmovsxbq (%rdi), %ymm0
>> +  %1 = load <16 x i8>* %a, align 1
>> +  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %1)
>> +  ret <4 x i64> %2
>> +}
>> +
>> +define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwd
>> +; CHECK: vpmovsxwd (%rdi), %ymm0
>> +  %1 = load <8 x i16>* %a, align 1
>> +  %2 = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %1)
>> +  ret <8 x i32> %2
>> +}
>> +
>> +define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwq
>> +; CHECK: vpmovsxwq (%rdi), %ymm0
>> +  %1 = load <8 x i16>* %a, align 1
>> +  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %1)
>> +  ret <4 x i64> %2
>> +}
>> +
>> +define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_avx2_pmovsxdq
>> +; CHECK: vpmovsxdq (%rdi), %ymm0
>> +  %1 = load <4 x i32>* %a, align 1
>> +  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %1)
>> +  ret <4 x i64> %2
>> +}
>> +
>> +define <16 x i16> @test_lvm_x86_avx2_pmovzxbw(<16 x i8>* %a) {
>> +; CHECK-LABEL: test_lvm_x86_avx2_pmovzxbw
>> +; CHECK: vpmovzxbw (%rdi), %ymm0
>> +  %1 = load <16 x i8>* %a, align 1
>> +  %2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %1)
>> +  ret <16 x i16> %2
>> +}
>> +
>> +define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbd
>> +; CHECK: vpmovzxbd (%rdi), %ymm0
>> +  %1 = load <16 x i8>* %a, align 1
>> +  %2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %1)
>> +  ret <8 x i32> %2
>> +}
>> +
>> +define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbq
>> +; CHECK: vpmovzxbq (%rdi), %ymm0
>> +  %1 = load <16 x i8>* %a, align 1
>> +  %2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %1)
>> +  ret <4 x i64> %2
>> +}
>> +
>> +define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwd
>> +; CHECK: vpmovzxwd (%rdi), %ymm0
>> +  %1 = load <8 x i16>* %a, align 1
>> +  %2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %1)
>> +  ret <8 x i32> %2
>> +}
>> +
>> +define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwq
>> +; CHECK: vpmovzxwq (%rdi), %ymm0
>> +  %1 = load <8 x i16>* %a, align 1
>> +  %2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %1)
>> +  ret <4 x i64> %2
>> +}
>> +
>> +define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_avx2_pmovzxdq
>> +; CHECK: vpmovzxdq (%rdi), %ymm0
>> +  %1 = load <4 x i32>* %a, align 1
>> +  %2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %1)
>> +  ret <4 x i64> %2
>> +}
>> +
>> +declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>)
>> +declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>)
>> +declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>)
>> +declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>)
>> +declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>)
>> +declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>)
>> +declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>)
>> +declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>)
>> +declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>)
>> +declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>)
>> +declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>)
>> +declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>)
>> 
>> Added: llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll?rev=223567&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll (added)
>> +++ llvm/trunk/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll Fri Dec  5
>> 19:31:07 2014
>> @@ -0,0 +1,123 @@
>> +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+sse4.1 |
>> FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
>> +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 |
>> FileCheck %s --check-prefix=CHECK --check-prefix=AVX
>> +
>> +define <8 x i16> @test_llvm_x86_sse41_pmovsxbw(<16 x i8>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbw
>> +; SSE41: pmovsxbw (%rdi), %xmm0
>> +; AVX:  vpmovsxbw (%rdi), %xmm0
>> +  %1 = load <16 x i8>* %a, align 1
>> +  %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %1)
>> +  ret <8 x i16> %2
>> +}
>> +
>> +define <4 x i32> @test_llvm_x86_sse41_pmovsxbd(<16 x i8>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbd
>> +; SSE41: pmovsxbd (%rdi), %xmm0
>> +; AVX:  vpmovsxbd (%rdi), %xmm0
>> +  %1 = load <16 x i8>* %a, align 1
>> +  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %1)
>> +  ret <4 x i32> %2
>> +}
>> +
>> +define <2 x i64> @test_llvm_x86_sse41_pmovsxbq(<16 x i8>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbq
>> +; SSE41: pmovsxbq (%rdi), %xmm0
>> +; AVX:  vpmovsxbq (%rdi), %xmm0
>> +  %1 = load <16 x i8>* %a, align 1
>> +  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %1)
>> +  ret <2 x i64> %2
>> +}
>> +
>> +define <4 x i32> @test_llvm_x86_sse41_pmovsxwd(<8 x i16>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwd
>> +; SSE41: pmovsxwd (%rdi), %xmm0
>> +; AVX:  vpmovsxwd (%rdi), %xmm0
>> +  %1 = load <8 x i16>* %a, align 1
>> +  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1)
>> +  ret <4 x i32> %2
>> +}
>> +
>> +define <2 x i64> @test_llvm_x86_sse41_pmovsxwq(<8 x i16>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwq
>> +; SSE41: pmovsxwq (%rdi), %xmm0
>> +; AVX:  vpmovsxwq (%rdi), %xmm0
>> +  %1 = load <8 x i16>* %a, align 1
>> +  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %1)
>> +  ret <2 x i64> %2
>> +}
>> +
>> +define <2 x i64> @test_llvm_x86_sse41_pmovsxdq(<4 x i32>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_sse41_pmovsxdq
>> +; SSE41: pmovsxdq (%rdi), %xmm0
>> +; AVX:  vpmovsxdq (%rdi), %xmm0
>> +  %1 = load <4 x i32>* %a, align 1
>> +  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %1)
>> +  ret <2 x i64> %2
>> +}
>> +
>> +define <8 x i16> @test_llvm_x86_sse41_pmovzxbw(<16 x i8>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbw
>> +; SSE41: pmovzxbw (%rdi), %xmm0
>> +; AVX:  vpmovzxbw (%rdi), %xmm0
>> +  %1 = load <16 x i8>* %a, align 1
>> +  %2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %1)
>> +  ret <8 x i16> %2
>> +}
>> +
>> +define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbd
>> +; SSE41: pmovzxbd (%rdi), %xmm0
>> +; AVX:  vpmovzxbd (%rdi), %xmm0
>> +  %1 = load <16 x i8>* %a, align 1
>> +  %2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %1)
>> +  ret <4 x i32> %2
>> +}
>> +
>> +define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbq
>> +; SSE41: pmovzxbq (%rdi), %xmm0
>> +; AVX:  vpmovzxbq (%rdi), %xmm0
>> +  %1 = load <16 x i8>* %a, align 1
>> +  %2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %1)
>> +  ret <2 x i64> %2
>> +}
>> +
>> +define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwd
>> +; SSE41: pmovzxwd (%rdi), %xmm0
>> +; AVX:  vpmovzxwd (%rdi), %xmm0
>> +  %1 = load <8 x i16>* %a, align 1
>> +  %2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %1)
>> +  ret <4 x i32> %2
>> +}
>> +
>> +define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwq
>> +; SSE41: pmovzxwq (%rdi), %xmm0
>> +; AVX:  vpmovzxwq (%rdi), %xmm0
>> +  %1 = load <8 x i16>* %a, align 1
>> +  %2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %1)
>> +  ret <2 x i64> %2
>> +}
>> +
>> +define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) {
>> +; CHECK-LABEL: test_llvm_x86_sse41_pmovzxdq
>> +; SSE41: pmovzxdq (%rdi), %xmm0
>> +; AVX:  vpmovzxdq (%rdi), %xmm0
>> +  %1 = load <4 x i32>* %a, align 1
>> +  %2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %1)
>> +  ret <2 x i64> %2
>> +}
>> +
>> +declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>)
>> +declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>)
>> +declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>)
>> +declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>)
>> +declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>)
>> +declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>)
>> +declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>)
>> +declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>)
>> +declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>)
>> +declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>)
>> +declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>)
>> +declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>)
>> 
>> Modified: llvm/trunk/test/CodeGen/X86/vector-sext.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext.ll?rev=223567&r1=223566&r2=223567&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vector-sext.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vector-sext.ll Fri Dec  5 19:31:07 2014
>> @@ -567,8 +567,7 @@ define <16 x i16> @sext_16i8_to_16i16(<1
>> ;
>> ; AVX2-LABEL: sext_16i8_to_16i16:
>> ; AVX2:       # BB#0: # %entry
>> -; AVX2-NEXT:    vmovdqa (%rdi), %xmm0
>> -; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
>> +; AVX2-NEXT:    vpmovsxbw (%rdi), %ymm0
>> ; AVX2-NEXT:    retq
>> ;
>> ; X32-SSE41-LABEL: sext_16i8_to_16i16:
>> 
>> Modified: llvm/trunk/test/CodeGen/X86/vector-zext.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext.ll?rev=223567&r1=223566&r2=223567&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/vector-zext.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vector-zext.ll Fri Dec  5 19:31:07 2014
>> @@ -204,3 +204,157 @@ entry:
>>  %t = zext <16 x i8> %z to <16 x i16>
>>  ret <16 x i16> %t
>> }
>> +
>> +define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
>> +; SSE2-LABEL: load_zext_16i8_to_16i16:
>> +; SSE2:        # BB#0: # %entry
>> +; SSE2-NEXT:   movdqa        (%rdi), %xmm1
>> +; SSE2-NEXT:   movdqa        %xmm1, %xmm0
>> +; SSE2-NEXT:   punpcklbw     %xmm0, %xmm0    # xmm0 =
>> xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
>> +; SSE2-NEXT:   movdqa        {{.*#+}} xmm2 =
>> [255,255,255,255,255,255,255,255]
>> +; SSE2-NEXT:   pand  %xmm2, %xmm0
>> +; SSE2-NEXT:   punpckhbw     %xmm1, %xmm1    # xmm1 =
>> xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
>> +; SSE2-NEXT:   pand  %xmm2, %xmm1
>> +; SSE2-NEXT:   retq
>> +
>> +; SSSE3-LABEL: load_zext_16i8_to_16i16:
>> +; SSSE3:        # BB#0: # %entry
>> +; SSSE3-NEXT:   movdqa        (%rdi), %xmm1
>> +; SSSE3-NEXT:   movdqa        %xmm1, %xmm0
>> +; SSSE3-NEXT:   punpcklbw     %xmm0, %xmm0    # xmm0 =
>> xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
>> +; SSSE3-NEXT:   movdqa        {{.*#+}} xmm2 =
>> [255,255,255,255,255,255,255,255]
>> +; SSSE3-NEXT:   pand  %xmm2, %xmm0
>> +; SSSE3-NEXT:   punpckhbw     %xmm1, %xmm1    # xmm1 =
>> xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
>> +; SSSE3-NEXT:   pand  %xmm2, %xmm1
>> +; SSSE3-NEXT:   retq
>> +
>> +; SSE41-LABEL: load_zext_16i8_to_16i16:
>> +; SSE41:        # BB#0: # %entry
>> +; SSE41-NEXT: movdqa (%rdi), %xmm1
>> +; SSE41-NEXT: pmovzxbw %xmm1, %xmm0
>> +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
>> +; SSE41-NEXT: pand %xmm2, %xmm0
>> +; SSE41-NEXT: punpckhbw %xmm1, %xmm1    # xmm1 =
>> xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
>> +; SSE41-NEXT: pand %xmm2, %xmm1
>> +; SSE41-NEXT: retq
>> +
>> +; AVX1-LABEL: load_zext_16i8_to_16i16:
>> +; AVX1:        # BB#0: # %entry
>> +; AVX1-NEXT: vmovdqa (%rdi), %xmm0
>> +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
>> +; AVX1-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm1 # xmm1 =
>> xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
>> +; AVX1-NEXT: vpmovzxbw %xmm0, %xmm0
>> +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
>> +; AVX1-NEXT: retq
>> +
>> +; AVX2-LABEL: load_zext_16i8_to_16i16:
>> +; AVX2:        # BB#0: # %entry
>> +; AVX2-NEXT: vpmovzxbw (%rdi), %ymm0
>> +; AVX2-NEXT: retq
>> +entry:
>> + %X = load <16 x i8>* %ptr
>> + %Y = zext <16 x i8> %X to <16 x i16>
>> + ret <16 x i16> %Y
>> +}
>> +
>> +define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
>> +; SSE2-LABEL: load_zext_8i16_to_8i32:
>> +; SSE2:          # BB#0: # %entry
>> +; SSE2-NEXT:   movdqa        (%rdi), %xmm1
>> +; SSE2-NEXT:   movdqa        %xmm1, %xmm0
>> +; SSE2-NEXT:   punpcklwd     %xmm0, %xmm0    # xmm0 = xmm0[0,0,1,1,2,2,3,3]
>> +; SSE2-NEXT:   movdqa        {{.*#+}} xmm2 = [65535,65535,65535,65535]
>> +; SSE2-NEXT:   pand  %xmm2, %xmm0
>> +; SSE2-NEXT:   punpckhwd     %xmm1, %xmm1    # xmm1 = xmm1[4,4,5,5,6,6,7,7]
>> +; SSE2-NEXT:   pand  %xmm2, %xmm1
>> +; SSE2-NEXT:   retq
>> +
>> +; SSSE3-LABEL: load_zext_8i16_to_8i32:
>> +; SSSE3:        # BB#0: # %entry
>> +; SSSE3-NEXT:   movdqa        (%rdi), %xmm1
>> +; SSSE3-NEXT:   movdqa        %xmm1, %xmm0
>> +; SSSE3-NEXT:   punpcklwd     %xmm0, %xmm0    # xmm0 =
>> xmm0[0,0,1,1,2,2,3,3]
>> +; SSSE3-NEXT:   movdqa        {{.*#+}} xmm2 = [65535,65535,65535,65535]
>> +; SSSE3-NEXT:   pand  %xmm2, %xmm0
>> +; SSSE3-NEXT:   punpckhwd     %xmm1, %xmm1    # xmm1 =
>> xmm1[4,4,5,5,6,6,7,7]
>> +; SSSE3-NEXT:   pand  %xmm2, %xmm1
>> +; SSSE3-NEXT:   retq
>> +
>> +; SSE41-LABEL: load_zext_8i16_to_8i32:
>> +; SSE41:        # BB#0: # %entry
>> +; SSE41-NEXT: movdqa (%rdi), %xmm1
>> +; SSE41-NEXT: pmovzxwd %xmm1, %xmm0
>> +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
>> +; SSE41-NEXT: pand %xmm2, %xmm0
>> +; SSE41-NEXT: punpckhwd %xmm1, %xmm1    # xmm1 = xmm1[4,4,5,5,6,6,7,7]
>> +; SSE41-NEXT: pand %xmm2, %xmm1
>> +; SSE41-NEXT: retq
>> +
>> +; AVX1-LABEL: load_zext_8i16_to_8i32:
>> +; AVX1:        # BB#0: # %entry
>> +; AVX1-NEXT:    vmovdqa       (%rdi), %xmm0
>> +; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
>> +; AVX1-NEXT:    vpunpckhwd    %xmm1, %xmm0, %xmm1 # xmm1 =
>> xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
>> +; AVX1-NEXT:    vpmovzxwd     %xmm0, %xmm0
>> +; AVX1-NEXT:    vinsertf128   $1, %xmm1, %ymm0, %ymm0
>> +; AVX1-NEXT:    retq
>> +
>> +; AVX2-LABEL: load_zext_8i16_to_8i32:
>> +; AVX2:        # BB#0: # %entry
>> +; AVX2-NEXT: vpmovzxwd (%rdi), %ymm0
>> +; AVX2-NEXT: retq
>> +entry:
>> + %X = load <8 x i16>* %ptr
>> + %Y = zext <8 x i16> %X to <8 x i32>
>> + ret <8 x i32>%Y
>> +}
>> +
>> +define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
>> +; SSE2-LABEL: load_zext_4i32_to_4i64:
>> +; SSE2:          # BB#0: # %entry
>> +; SSE2-NEXT:   movdqa        (%rdi), %xmm1
>> +; SSE2-NEXT:   pshufd        $-44, %xmm1, %xmm0      # xmm0 = xmm1[0,1,1,3]
>> +; SSE2-NEXT:   movdqa        {{.*#+}} xmm2 = [4294967295,4294967295]
>> +; SSE2-NEXT:   pand  %xmm2, %xmm0
>> +; SSE2-NEXT:   pshufd        $-6, %xmm1, %xmm1       # xmm1 = xmm1[2,2,3,3]
>> +; SSE2-NEXT:   pand  %xmm2, %xmm1
>> +; SSE2-NEXT:   retq
>> +
>> +; SSSE3-LABEL: load_zext_4i32_to_4i64:
>> +; SSSE3:        # BB#0: # %entry
>> +; SSSE3-NEXT:   movdqa        (%rdi), %xmm1
>> +; SSSE3-NEXT:   pshufd        $-44, %xmm1, %xmm0      # xmm0 =
>> xmm1[0,1,1,3]
>> +; SSSE3-NEXT:   movdqa        {{.*#+}} xmm2 = [4294967295,4294967295]
>> +; SSSE3-NEXT:   pand  %xmm2, %xmm0
>> +; SSSE3-NEXT:   pshufd        $-6, %xmm1, %xmm1       # xmm1 =
>> xmm1[2,2,3,3]
>> +; SSSE3-NEXT:   pand  %xmm2, %xmm1
>> +; SSSE3-NEXT:   retq
>> +
>> +; SSE41-LABEL: load_zext_4i32_to_4i64:
>> +; SSE41:        # BB#0: # %entry
>> +; SSE41-NEXT: movdqa (%rdi), %xmm1
>> +; SSE41-NEXT: pmovzxdq %xmm1, %xmm0
>> +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
>> +; SSE41-NEXT: pand %xmm2, %xmm0
>> +; SSE41-NEXT: pshufd $-6, %xmm1, %xmm1       # xmm1 = xmm1[2,2,3,3]
>> +; SSE41-NEXT: pand %xmm2, %xmm1
>> +; SSE41-NEXT: retq
>> +
>> +; AVX1-LABEL: load_zext_4i32_to_4i64:
>> +; AVX1:        # BB#0: # %entry
>> +; AVX1-NEXT: vmovdqa (%rdi), %xmm0
>> +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
>> +; AVX1-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm1 # xmm1 =
>> xmm0[2],xmm1[2],xmm0[3],xmm1[3]
>> +; AVX1-NEXT: vpmovzxdq %xmm0, %xmm0
>> +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
>> +; AVX1-NEXT: retq
>> +
>> +; AVX2-LABEL: load_zext_4i32_to_4i64:
>> +; AVX2:        # BB#0: # %entry
>> +; AVX2-NEXT: vpmovzxdq (%rdi), %ymm0
>> +; AVX2-NEXT: retq
>> +entry:
>> + %X = load <4 x i32>* %ptr
>> + %Y = zext <4 x i32> %X to <4 x i64>
>> + ret <4 x i64>%Y
>> +}
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>> 
>> 





More information about the llvm-commits mailing list