[llvm-commits] [llvm] r146191 - in /llvm/trunk: lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/2011-12-08-AVXISelBugs.ll

Thu Dec 8 14:11:27 PST 2011

AVX experts, please review this patch.

Thanks,

Evan

On Dec 8, 2011, at 2:05 PM, Evan Cheng wrote:

> Author: evancheng
> Date: Thu Dec  8 16:05:28 2011
> New Revision: 146191
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=146191&view=rev
> Log:
> Add various missing AVX patterns which was causing crashes. Sadly, the generated
> code looks pretty bad compared to SSE.
> 
> rdar://10538793
> 
> Added:
>    llvm/trunk/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
> Modified:
>    llvm/trunk/lib/Target/X86/X86InstrSSE.td
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=146191&r1=146190&r2=146191&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Dec  8 16:05:28 2011
> @@ -561,6 +561,16 @@
>                       (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
>   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
>             (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
> +
> +  // Move low f32 and clear high bits.
> +  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
> +            (SUBREG_TO_REG (i32 0),
> +              (VMOVSSrr (v4f32 (V_SET0)),
> +                        (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>;
> +  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
> +            (SUBREG_TO_REG (i32 0),
> +              (VMOVSSrr (v4i32 (V_SET0)),
> +                        (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>;
>   }
> 
>   let AddedComplexity = 20 in {
> @@ -588,6 +598,9 @@
> 
>   // Represent the same patterns above but in the form they appear for
>   // 256-bit types
> +  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
> +                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
> +            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
>   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
>                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
>             (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
> @@ -606,6 +619,12 @@
>                            (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
>                            sub_xmm)>;
> 
> +  // Move low f64 and clear high bits.
> +  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
> +            (SUBREG_TO_REG (i32 0),
> +              (VMOVSDrr (v2f64 (V_SET0)),
> +                        (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>;
> +
>   // Extract and store.
>   def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
>                    addr:$dst),
> @@ -756,6 +775,19 @@
>                             "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
> }
> 
> +let Predicates = [HasAVX] in {
> +def : Pat<(v8i32 (X86vzmovl
> +                        (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))),
> +          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
> +def : Pat<(v8f32 (X86vzmovl
> +                        (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))),
> +          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
> +def : Pat<(v4f64 (X86vzmovl
> +                        (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))),
> +          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
> +}
> +
> +
> def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
> def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
>           (VMOVUPSYmr addr:$dst, VR256:$src)>;
> 
> Added: llvm/trunk/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll?rev=146191&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll Thu Dec  8 16:05:28 2011
> @@ -0,0 +1,63 @@
> +; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx
> +; Various missing patterns causing crashes.
> +; rdar://10538793
> +
> +define void @t1() nounwind {
> +entry:
> +  br label %loop.cond
> +
> +loop.cond:                                        ; preds = %t1.exit, %entry
> +  br i1 false, label %return, label %loop
> +
> +loop:                                             ; preds = %loop.cond
> +  br i1 undef, label %0, label %t1.exit
> +
> +; <label>:0                                       ; preds = %loop
> +  %1 = load <16 x i32> addrspace(1)* undef, align 64
> +  %2 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %1, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0>
> +  store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
> +  br label %t1.exit
> +
> +t1.exit:                                 ; preds = %0, %loop
> +  br label %loop.cond
> +
> +return:                                           ; preds = %loop.cond
> +  ret void
> +}
> +
> +define void @t2() nounwind {
> +  br i1 undef, label %1, label %4
> +
> +; <label>:1                                       ; preds = %0
> +  %2 = load <16 x i32> addrspace(1)* undef, align 64
> +  %3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0>
> +  store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
> +  br label %4
> +
> +; <label>:4                                       ; preds = %1, %0
> +  ret void
> +}
> +
> +define void @t3() nounwind {
> +entry:
> +  br label %loop.cond
> +
> +loop.cond:                                        ; preds = %t2.exit, %entry
> +  br i1 false, label %return, label %loop
> +
> +loop:                                             ; preds = %loop.cond
> +  br i1 undef, label %0, label %t2.exit
> +
> +; <label>:0                                       ; preds = %loop
> +  %1 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0>
> +  %2 = load <16 x i32> addrspace(1)* undef, align 64
> +  %3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
> +  store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
> +  br label %t2.exit
> +
> +t2.exit:                                 ; preds = %0, %loop
> +  br label %loop.cond
> +
> +return:                                           ; preds = %loop.cond
> +  ret void
> +}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits