[llvm] r237903 - AVX-512: Enabled SSE intrinsics on AVX-512.

Ahmed Bougacha ahmed.bougacha at gmail.com
Thu May 21 17:11:51 PDT 2015


On Thu, May 21, 2015 at 7:01 AM, Elena Demikhovsky
<elena.demikhovsky at intel.com> wrote:
> Author: delena
> Date: Thu May 21 09:01:32 2015
> New Revision: 237903
>
> URL: http://llvm.org/viewvc/llvm-project?rev=237903&view=rev
> Log:
> AVX-512: Enabled SSE intrinsics on AVX-512.
> Predicate UseAVX depricates pattern selection on AVX-512.
> This predicate is necessary for DAG selection to select EVEX form.
> But mapping SSE intrinsics to AVX-512 instructions is not ready yet.
> So I replaced UseAVX with HasAVX for intrinsics patterns.
>
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86InstrFormats.td
>     llvm/trunk/lib/Target/X86/X86InstrSSE.td
>     llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
>     llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
>     llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrFormats.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFormats.td?rev=237903&r1=237902&r2=237903&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrFormats.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrFormats.td Thu May 21 09:01:32 2015
> @@ -442,12 +442,29 @@ class SI<bits<8> o, Format F, dag outs,
>                    asm));
>  }
>
> -// SIi8 - SSE 1 & 2 scalar instructions
> +// SI - SSE 1 & 2 scalar intrinsics - vex form available on AVX512
> +class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
> +         list<dag> pattern, InstrItinClass itin = NoItinerary,
> +         Domain d = GenericDomain>
> +      : I<o, F, outs, ins, asm, pattern, itin, d> {
> +  let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
> +                   !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
> +                   !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
> +                   !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
> +                   !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
> +                   [UseSSE1])))));
> +
> +  // AVX instructions have a 'v' prefix in the mnemonic
> +  let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
> +                  !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
> +                  asm));
> +}
> +// SIi8 - SSE 1 & 2 scalar instructions - vex form available on AVX512
>  class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
>             list<dag> pattern, InstrItinClass itin = NoItinerary>
>        : Ii8<o, F, outs, ins, asm, pattern, itin> {
>    let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
> -                   !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
> +                   !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],

We don't need (or want) this change, right?

-Ahmed

>                     !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
>                     [UseSSE2])));
>
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=237903&r1=237902&r2=237903&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu May 21 09:01:32 2015
> @@ -264,7 +264,7 @@ multiclass sse12_fp_scalar_int<bits<8> o
>                               Operand memopr, ComplexPattern mem_cpat,
>                               Domain d, OpndItins itins, bit Is2Addr = 1> {
>  let isCodeGenOnly = 1 in {
> -  def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
> +  def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
>         !if(Is2Addr,
>             !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
>             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
> @@ -272,7 +272,7 @@ let isCodeGenOnly = 1 in {
>                   !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
>               RC:$src1, RC:$src2))], itins.rr, d>,
>         Sched<[itins.Sched]>;
> -  def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
> +  def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
>         !if(Is2Addr,
>             !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
>             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
> @@ -1851,14 +1851,14 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
>                         "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
>                         [(set VR128:$dst,
>                           (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
> -                       IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>,
> +                       IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
>                         Sched<[WriteCvtF2F]>;
>  def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
>                         (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
>                         "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
>                         [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
>                                            VR128:$src1, sse_load_f64:$src2))],
> -                       IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>,
> +                       IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
>                         Sched<[WriteCvtF2FLd, ReadAfterLd]>;
>
>  let Constraints = "$src1 = $dst" in {
> @@ -1936,14 +1936,14 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
>                      "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
>                      [(set VR128:$dst,
>                        (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
> -                    IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>,
> +                    IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
>                      Sched<[WriteCvtF2F]>;
>  def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
>                        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
>                      "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
>                      [(set VR128:$dst,
>                        (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
> -                    IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>,
> +                    IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
>                      Sched<[WriteCvtF2FLd, ReadAfterLd]>;
>  let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
>  def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
> @@ -3380,7 +3380,7 @@ multiclass avx_fp_unop_s<bits<8> opc, st
>                            X86MemOperand x86memop, Operand vec_memop,
>                            ComplexPattern mem_cpat,
>                            Intrinsic Intr, SDNode OpNode, Domain d,
> -                          OpndItins itins, Predicate target, string Suffix> {
> +                          OpndItins itins, string Suffix> {
>    let hasSideEffects = 0 in {
>    def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
>              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> @@ -3402,7 +3402,7 @@ multiclass avx_fp_unop_s<bits<8> opc, st
>    }
>    }
>
> -  let Predicates = [target] in {
> +  let Predicates = [UseAVX] in {
>     def : Pat<(OpNode RC:$src),  (!cast<Instruction>("V"#NAME#Suffix##r)
>                                  (ScalarVT (IMPLICIT_DEF)), RC:$src)>;
>
> @@ -3410,6 +3410,8 @@ multiclass avx_fp_unop_s<bits<8> opc, st
>               (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),
>                                    mem_cpat:$src)>;
>
> +  }
> +  let Predicates = [HasAVX] in {
>     def : Pat<(Intr VR128:$src),
>               (!cast<Instruction>("V"#NAME#Suffix##r_Int) (vt (IMPLICIT_DEF)),
>                                   VR128:$src)>;
> @@ -3418,7 +3420,7 @@ multiclass avx_fp_unop_s<bits<8> opc, st
>               (!cast<Instruction>("V"#NAME#Suffix##m_Int)
>                      (vt (IMPLICIT_DEF)), mem_cpat:$src)>;
>    }
> -  let Predicates = [target, OptForSize] in
> +  let Predicates = [UseAVX, OptForSize] in
>    def : Pat<(ScalarVT (OpNode (load addr:$src))),
>              (!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
>               addr:$src)>;
> @@ -3505,7 +3507,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, s
>    defm V#NAME#SS  : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
>                        f32mem, ssmem, sse_load_f32,
>                        !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
> -                      SSEPackedSingle, itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG;
> +                      SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG;
>  }
>
>  multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
> @@ -3517,7 +3519,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, s
>    defm V#NAME#SD  : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
>                           f64mem, sdmem, sse_load_f64,
>                           !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
> -                         OpNode, SSEPackedDouble, itins, UseAVX, "SD">,
> +                         OpNode, SSEPackedDouble, itins, "SD">,
>                           XD, VEX_4V, VEX_LIG;
>  }
>
> @@ -4980,7 +4982,7 @@ def MOVPQI2QIrr : S2I<0xD6, MRMDestReg,
>  //===---------------------------------------------------------------------===//
>  // Store / copy lower 64-bits of a XMM register.
>  //
> -let Predicates = [UseAVX] in
> +let Predicates = [HasAVX] in
>  def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src),
>            (VMOVPQI2QImr addr:$dst, VR128:$src)>;
>  let Predicates = [UseSSE2] in
>
> Modified: llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll?rev=237903&r1=237902&r2=237903&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll Thu May 21 09:01:32 2015
> @@ -1,4 +1,5 @@
>  ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s
> +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s
>
>  define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
>    ; CHECK: addss
>
> Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=237903&r1=237902&r2=237903&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Thu May 21 09:01:32 2015
> @@ -1,4 +1,5 @@
> -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s
> +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
> +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s
>
>  define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
>    ; CHECK: addsd
> @@ -142,7 +143,7 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x
>
>  define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
>    ; CHECK: cvtsd2ss
> -  ; CHECK-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
> +  ; SSE-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
>    %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
>    ret <4 x float> %res
>  }
>
> Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll?rev=237903&r1=237902&r2=237903&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll Thu May 21 09:01:32 2015
> @@ -1,4 +1,5 @@
>  ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s
> +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s
>
>  define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
>    ; CHECK: blendpd
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list