[llvm-commits] [llvm] r155437 - in /llvm/trunk: lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx2-vbroadcast.ll
Craig Topper
craig.topper at gmail.com
Tue Apr 24 07:58:54 PDT 2012
Why do we need a pattern for 2f64? There's not a load version of that type.
Also don't you need integer versions?
On Tue, Apr 24, 2012 at 4:07 AM, Nadav Rotem <nadav.rotem at intel.com> wrote:
> Author: nadav
> Date: Tue Apr 24 06:07:03 2012
> New Revision: 155437
>
> URL: http://llvm.org/viewvc/llvm-project?rev=155437&view=rev
> Log:
> AVX: We lower VECTOR_SHUFFLE and BUILD_VECTOR nodes into vbroadcast
> instructions
> using the pattern (vbroadcast (i32load src)). In some cases, after we
> generate
> this pattern new users are added to the load node, which prevent the
> selection
> of the blend pattern. This commit provides fallback patterns which perform
> in-vector broadcast (using in-vector vbroadcast in AVX2 and pshufd on
> AVX1).
>
>
> Modified:
> llvm/trunk/lib/Target/X86/X86InstrSSE.td
> llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=155437&r1=155436&r2=155437&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Apr 24 06:07:03 2012
> @@ -7723,6 +7723,20 @@
> (VPBROADCASTQrm addr:$src)>;
> def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
> (VPBROADCASTQYrm addr:$src)>;
> +
> + // Provide fallback in case the load node that is used in the patterns
> above
> + // is used by additional users, which prevents the pattern selection.
> + let AddedComplexity = 20 in {
> + def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
> + (VBROADCASTSSrr
> + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
> + def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
> + (VBROADCASTSSYrr
> + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
> + def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
> + (VBROADCASTSDrr
> + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
> + }
> }
>
> // AVX1 broadcast patterns
> @@ -7735,11 +7749,38 @@
> (VBROADCASTSSYrm addr:$src)>;
> def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
> (VBROADCASTSDrm addr:$src)>;
> -
> def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
> (VBROADCASTSSrm addr:$src)>;
> def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
> (VBROADCASTSSrm addr:$src)>;
> +
> + // Provide fallback in case the load node that is used in the patterns
> above
> + // is used by additional users, which prevents the pattern selection.
> + let AddedComplexity = 20 in {
> + // 128bit broadcasts:
> + def : Pat<(v2f64 (X86VBroadcast FR64:$src)),
> + (VPSHUFDri
> + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
> 0)>;
> + def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
> + (VPSHUFDri
> + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss),
> 0)>;
> + def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
> + (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
> + (VPSHUFDri
> + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
> sub_ss), 0),
> + sub_xmm),
> + (VPSHUFDri
> + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss),
> + 0), 1)>;
> + def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
> + (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
> + (VPSHUFDri
> + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
> sub_sd), 0),
> + sub_xmm),
> + (VPSHUFDri
> + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
> + 0), 1)>;
> + }
> }
>
>
> //===----------------------------------------------------------------------===//
>
> Modified: llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll?rev=155437&r1=155436&r2=155437&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll Tue Apr 24 06:07:03 2012
> @@ -160,6 +160,15 @@
> ret <8 x i32> %g
> }
>
> +; CHECK: V113
> +; CHECK: vbroadcastss
> +; CHECK: ret
> +define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
> +entry:
> + %g = fadd <8 x float> %in, <float 0xbf80000000000000, float
> 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000,
> float 0xbf80000000000000, float 0xbf80000000000000, float
> 0xbf80000000000000, float 0xbf80000000000000>
> + ret <8 x float> %g
> +}
> +
> ; CHECK: _e2
> ; CHECK: vbroadcastss
> ; CHECK: ret
> @@ -179,9 +188,37 @@
> %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
> %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
> %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
> - %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3
> - %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3
> - %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3
> - %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3
> + %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
> + %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
> + %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
> + %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
> ret <8 x i8> %vecinit7.i
> }
> +
> +
> +define void @crash() nounwind alwaysinline {
> +WGLoopsEntry:
> + br i1 undef, label %ret, label %footer329VF
> +
> +footer329VF:
> + %A.0.inVF = fmul float undef, 6.553600e+04
> + %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float
> 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04,
> float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
> + %A.0VF = fptosi float %A.0.inVF to i32
> + %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
> + %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32
> 65535, i32 65535, i32 65535, i32 65535, i32 65535>
> + %1 = and i32 %A.0VF, 65535
> + %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
> + %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef,
> <8 x i32> zeroinitializer
> + br i1 undef, label %preload1201VF, label %footer349VF
> +
> +preload1201VF:
> + br label %footer349VF
> +
> +footer349VF:
> + %2 = mul nsw <8 x i32> undef, %0
> + %3 = mul nsw <8 x i32> undef, %vector1099VF
> + br label %footer329VF
> +
> +ret:
> + ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
--
~Craig
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20120424/19257079/attachment.html>
More information about the llvm-commits
mailing list