[llvm-commits] [llvm] r155437 - in /llvm/trunk: lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx2-vbroadcast.ll

Craig Topper craig.topper at gmail.com
Tue Apr 24 07:58:54 PDT 2012


Why do we need a pattern for 2f64? There's not a load version of that type.
Also don't you need integer versions?

On Tue, Apr 24, 2012 at 4:07 AM, Nadav Rotem <nadav.rotem at intel.com> wrote:

> Author: nadav
> Date: Tue Apr 24 06:07:03 2012
> New Revision: 155437
>
> URL: http://llvm.org/viewvc/llvm-project?rev=155437&view=rev
> Log:
> AVX: We lower VECTOR_SHUFFLE and BUILD_VECTOR nodes into vbroadcast
> instructions
> using the pattern (vbroadcast (i32load src)). In some cases, after we
> generate
> this pattern new users are added to the load node, which prevent the
> selection
> of the blend pattern. This commit provides fallback patterns which perform
> in-vector broadcast (using in-vector vbroadcast in AVX2 and pshufd on
> AVX1).
>
>
> Modified:
>    llvm/trunk/lib/Target/X86/X86InstrSSE.td
>    llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=155437&r1=155436&r2=155437&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Apr 24 06:07:03 2012
> @@ -7723,6 +7723,20 @@
>           (VPBROADCASTQrm addr:$src)>;
>   def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
>           (VPBROADCASTQYrm addr:$src)>;
> +
> +  // Provide fallback in case the load node that is used in the patterns
> above
> +  // is used by additional users, which prevents the pattern selection.
> +  let AddedComplexity = 20 in {
> +    def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
> +              (VBROADCASTSSrr
> +              (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
> +    def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
> +              (VBROADCASTSSYrr
> +              (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
> +    def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
> +              (VBROADCASTSDrr
> +              (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
> +  }
>  }
>
>  // AVX1 broadcast patterns
> @@ -7735,11 +7749,38 @@
>           (VBROADCASTSSYrm addr:$src)>;
>  def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
>           (VBROADCASTSDrm addr:$src)>;
> -
>  def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
>           (VBROADCASTSSrm addr:$src)>;
>  def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
>           (VBROADCASTSSrm addr:$src)>;
> +
> +  // Provide fallback in case the load node that is used in the patterns
> above
> +  // is used by additional users, which prevents the pattern selection.
> +  let AddedComplexity = 20 in {
> +  // 128bit broadcasts:
> +  def : Pat<(v2f64 (X86VBroadcast FR64:$src)),
> +            (VPSHUFDri
> +            (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
> 0)>;
> +  def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
> +            (VPSHUFDri
> +            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss),
> 0)>;
> +  def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
> +            (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
> +              (VPSHUFDri
> +                (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
> sub_ss), 0),
> +                  sub_xmm),
> +              (VPSHUFDri
> +                (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss),
> +               0), 1)>;
> +  def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
> +            (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
> +              (VPSHUFDri
> +                (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
> sub_sd), 0),
> +                  sub_xmm),
> +              (VPSHUFDri
> +                (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
> +              0), 1)>;
> +  }
>  }
>
>
>  //===----------------------------------------------------------------------===//
>
> Modified: llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll?rev=155437&r1=155436&r2=155437&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll Tue Apr 24 06:07:03 2012
> @@ -160,6 +160,15 @@
>   ret <8 x i32> %g
>  }
>
> +; CHECK: V113
> +; CHECK: vbroadcastss
> +; CHECK: ret
> +define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
> +entry:
> +  %g = fadd <8 x float> %in, <float 0xbf80000000000000, float
> 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000,
> float 0xbf80000000000000, float 0xbf80000000000000, float
> 0xbf80000000000000, float 0xbf80000000000000>
> +  ret <8 x float> %g
> +}
> +
>  ; CHECK: _e2
>  ; CHECK: vbroadcastss
>  ; CHECK: ret
> @@ -179,9 +188,37 @@
>   %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
>   %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
>   %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
> -  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3
> -  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3
> -  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3
> -  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3
> +  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
> +  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
> +  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
> +  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
>   ret <8 x i8> %vecinit7.i
>  }
> +
> +
> +define void @crash() nounwind alwaysinline {
> +WGLoopsEntry:
> +  br i1 undef, label %ret, label %footer329VF
> +
> +footer329VF:
> +  %A.0.inVF = fmul float undef, 6.553600e+04
> +  %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float
> 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04,
> float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
> +  %A.0VF = fptosi float %A.0.inVF to i32
> +  %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
> +  %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> +  %1 = and i32 %A.0VF, 65535
> +  %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
> +  %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef,
> <8 x i32> zeroinitializer
> +  br i1 undef, label %preload1201VF, label %footer349VF
> +
> +preload1201VF:
> +  br label %footer349VF
> +
> +footer349VF:
> +  %2 = mul nsw <8 x i32> undef, %0
> +  %3 = mul nsw <8 x i32> undef, %vector1099VF
> +  br label %footer329VF
> +
> +ret:
> +  ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>



-- 
~Craig
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20120424/19257079/attachment.html>


More information about the llvm-commits mailing list