[llvm] r220540 - [AVX512] FMA support for the 231 variants

Hal Finkel hfinkel at anl.gov
Sun Feb 1 15:22:17 PST 2015


----- Original Message -----
> From: "Adam Nemet" <anemet at apple.com>
> To: llvm-commits at cs.uiuc.edu
> Sent: Thursday, October 23, 2014 7:03:00 PM
> Subject: [llvm] r220540 - [AVX512] FMA support for the 231 variants
> 
> Author: anemet
> Date: Thu Oct 23 19:03:00 2014
> New Revision: 220540
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=220540&view=rev
> Log:
> [AVX512] FMA support for the 231 variants
> 
> This is asm/diasm-only support, similar to AVX.
> 
> For ISeling the register variant, they are no different from 213
> other than
> whether the multiplication or the addition operand is destructed.
> 
> For ISeling the memory variant, i.e. to fold a load, they are no
> different
> than the 132 variant.  The addition operand (op3) in both cases can
> come from
> memory.  Again the ony difference is which operand is destructed.
> 
> There could be a post-RA pass that would convert a 213 or 132 into a
> 231.

Hi Adam,

If I understand the situation correctly, the PPC backend solves the same problem for VSX FMA instructions (for the register-operand case) using the pass in lib/Target/PowerPC/PPCVSXFMAMutate.cpp. The PPCVSXFMAMutate pass runs in between MI scheduling and RA, and mutates the FMA form (from the addend-destructing form to the multiplicand-destructing form when doing so will eliminate a copy). I think that this could be made target-independent pretty easily, and then we could handle the AVX-512 FMA mutation as well. What do you think?

Thanks again,
Hal

> 
> Part of <rdar://problem/17082571>
> 
> Modified:
>     llvm/trunk/lib/Target/X86/X86InstrAVX512.td
>     llvm/trunk/test/MC/X86/avx512-encodings.s
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=220540&r1=220539&r2=220540&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Oct 23 19:03:00
> 2014
> @@ -3356,40 +3356,44 @@ multiclass avx512_fma3p_rm<bits<8> opc,
>  }
>  } // Constraints = "$src1 = $dst"
>  
> -multiclass avx512_fma3p_forms<bits<8> opc213,
> +multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
>                                string OpcodeStr, X86VectorVTInfo VTI,
>                                SDPatternOperator OpNode> {
>    defm v213 : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213",
>    VTI.Suffix),
>                                VTI, OpNode>,
>                EVEX_V512, EVEX_CD8<VTI.EltSize, CD8VF>;
> +
> +  defm v231 : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231",
> VTI.Suffix),
> +                              VTI>,
> +              EVEX_V512, EVEX_CD8<VTI.EltSize, CD8VF>;
>  }
>  
>  let ExeDomain = SSEPackedSingle in {
> -  defm VFMADDPSZ    : avx512_fma3p_forms<0xA8, "vfmadd",
> +  defm VFMADDPSZ    : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd",
>                                           v16f32_info, X86Fmadd>;
> -  defm VFMSUBPSZ    : avx512_fma3p_forms<0xAA, "vfmsub",
> +  defm VFMSUBPSZ    : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub",
>                                           v16f32_info, X86Fmsub>;
> -  defm VFMADDSUBPSZ : avx512_fma3p_forms<0xA6, "vfmaddsub",
> +  defm VFMADDSUBPSZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub",
>                                           v16f32_info, X86Fmaddsub>;
> -  defm VFMSUBADDPSZ : avx512_fma3p_forms<0xA7, "vfmsubadd",
> +  defm VFMSUBADDPSZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd",
>                                           v16f32_info, X86Fmsubadd>;
> -  defm VFNMADDPSZ   : avx512_fma3p_forms<0xAC, "vfnmadd",
> +  defm VFNMADDPSZ   : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd",
>                                           v16f32_info, X86Fnmadd>;
> -  defm VFNMSUBPSZ   : avx512_fma3p_forms<0xAE, "vfnmsub",
> +  defm VFNMSUBPSZ   : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub",
>                                           v16f32_info, X86Fnmsub>;
>  }
>  let ExeDomain = SSEPackedDouble in {
> -  defm VFMADDPDZ    : avx512_fma3p_forms<0xA8, "vfmadd",
> +  defm VFMADDPDZ    : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd",
>                                           v8f64_info, X86Fmadd>,
>                                           VEX_W;
> -  defm VFMSUBPDZ    : avx512_fma3p_forms<0xAA, "vfmsub",
> +  defm VFMSUBPDZ    : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub",
>                                           v8f64_info, X86Fmsub>,
>                                           VEX_W;
> -  defm VFMADDSUBPDZ : avx512_fma3p_forms<0xA6, "vfmaddsub",
> +  defm VFMADDSUBPDZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub",
>                                           v8f64_info, X86Fmaddsub>,
>                                           VEX_W;
> -  defm VFMSUBADDPDZ : avx512_fma3p_forms<0xA7, "vfmsubadd",
> +  defm VFMSUBADDPDZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd",
>                                           v8f64_info, X86Fmsubadd>,
>                                           VEX_W;
> -  defm VFNMADDPDZ :   avx512_fma3p_forms<0xAC, "vfnmadd",
> +  defm VFNMADDPDZ :   avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd",
>                                           v8f64_info, X86Fnmadd>,
>                                           VEX_W;
> -  defm VFNMSUBPDZ :   avx512_fma3p_forms<0xAE, "vfnmsub",
> +  defm VFNMSUBPDZ :   avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub",
>                                           v8f64_info, X86Fnmsub>,
>                                           VEX_W;
>  }
>  
> 
> Modified: llvm/trunk/test/MC/X86/avx512-encodings.s
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/avx512-encodings.s?rev=220540&r1=220539&r2=220540&view=diff
> ==============================================================================
> --- llvm/trunk/test/MC/X86/avx512-encodings.s (original)
> +++ llvm/trunk/test/MC/X86/avx512-encodings.s Thu Oct 23 19:03:00
> 2014
> @@ -4351,3 +4351,27 @@ vextractf32x4  $3, %zmm3, %xmm1 {%k1}
>  // CHECK: vextracti64x4 $1
>  // CHECK: encoding: [0x62,0x53,0xfd,0xcb,0x3b,0xf4,0x01]
>  vextracti64x4  $1, %zmm14, %ymm12 {%k3} {z}
> +
> +// CHECK: vfmadd231ps
> +// CHECK: encoding: [0x62,0xb2,0x1d,0x48,0xb8,0xe7]
> +vfmadd231ps %zmm23, %zmm12, %zmm4
> +
> +// CHECK: vfmsub231pd
> +// CHECK: encoding: [0x62,0xe2,0xed,0x48,0xba,0x73,0x08]
> +vfmsub231pd 0x200(%rbx), %zmm2, %zmm22
> +
> +// CHECK: vfmaddsub231ps
> +// CHECK: encoding: [0x62,0xd2,0x65,0x4b,0xb6,0xec]
> +vfmaddsub231ps %zmm12, %zmm3, %zmm5 {%k3}
> +
> +// CHECK: vfmsubadd231pd
> +// CHECK: encoding: [0x62,0x72,0x85,0xc5,0xb7,0xdd]
> +vfmsubadd231pd %zmm5, %zmm31, %zmm11 {%k5}{z}
> +
> +// CHECK: vfnmadd231ps
> +// CHECK: encoding: [0x62,0xf2,0x4d,0x48,0xbc,0xfd]
> +vfnmadd231ps %zmm5, %zmm6, %zmm7
> +
> +// CHECK: vfnmsub231pd
> +// CHECK: encoding: [0x62,0xf2,0xcd,0x48,0xbe,0xfd]
> +vfnmsub231pd %zmm5, %zmm6, %zmm7
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 

-- 
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory



More information about the llvm-commits mailing list