[llvm] r220540 - [AVX512] FMA support for the 231 variants

Adam Nemet anemet at apple.com
Mon Feb 2 14:54:31 PST 2015


> On Feb 1, 2015, at 3:22 PM, Hal Finkel <hfinkel at anl.gov> wrote:
> 
> ----- Original Message -----
>> From: "Adam Nemet" <anemet at apple.com>
>> To: llvm-commits at cs.uiuc.edu
>> Sent: Thursday, October 23, 2014 7:03:00 PM
>> Subject: [llvm] r220540 - [AVX512] FMA support for the 231 variants
>> 
>> Author: anemet
>> Date: Thu Oct 23 19:03:00 2014
>> New Revision: 220540
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=220540&view=rev
>> Log:
>> [AVX512] FMA support for the 231 variants
>> 
>> This is asm/diasm-only support, similar to AVX.
>> 
>> For ISeling the register variant, they are no different from 213
>> other than
>> whether the multiplication or the addition operand is destructed.
>> 
>> For ISeling the memory variant, i.e. to fold a load, they are no
>> different
>> than the 132 variant.  The addition operand (op3) in both cases can
>> come from
>> memory.  Again the ony difference is which operand is destructed.
>> 
>> There could be a post-RA pass that would convert a 213 or 132 into a
>> 231.
> 
> Hi Adam,
> 
> If I understand the situation correctly, the PPC backend solves the same problem for VSX FMA instructions (for the register-operand case) using the pass in lib/Target/PowerPC/PPCVSXFMAMutate.cpp. The PPCVSXFMAMutate pass runs in between MI scheduling and RA, and mutates the FMA form (from the addend-destructing form to the multiplicand-destructing form when doing so will eliminate a copy). I think that this could be made target-independent pretty easily, and then we could handle the AVX-512 FMA mutation as well. What do you think?

Hi Hal,

Yes that’s the same idea.  Thanks for the pointer.  Copying Elena as well.

Adam

> Thanks again,
> Hal
> 
>> 
>> Part of <rdar://problem/17082571>
>> 
>> Modified:
>>    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
>>    llvm/trunk/test/MC/X86/avx512-encodings.s
>> 
>> Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=220540&r1=220539&r2=220540&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
>> +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Oct 23 19:03:00
>> 2014
>> @@ -3356,40 +3356,44 @@ multiclass avx512_fma3p_rm<bits<8> opc,
>> }
>> } // Constraints = "$src1 = $dst"
>> 
>> -multiclass avx512_fma3p_forms<bits<8> opc213,
>> +multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
>>                               string OpcodeStr, X86VectorVTInfo VTI,
>>                               SDPatternOperator OpNode> {
>>   defm v213 : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213",
>>   VTI.Suffix),
>>                               VTI, OpNode>,
>>               EVEX_V512, EVEX_CD8<VTI.EltSize, CD8VF>;
>> +
>> +  defm v231 : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231",
>> VTI.Suffix),
>> +                              VTI>,
>> +              EVEX_V512, EVEX_CD8<VTI.EltSize, CD8VF>;
>> }
>> 
>> let ExeDomain = SSEPackedSingle in {
>> -  defm VFMADDPSZ    : avx512_fma3p_forms<0xA8, "vfmadd",
>> +  defm VFMADDPSZ    : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd",
>>                                          v16f32_info, X86Fmadd>;
>> -  defm VFMSUBPSZ    : avx512_fma3p_forms<0xAA, "vfmsub",
>> +  defm VFMSUBPSZ    : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub",
>>                                          v16f32_info, X86Fmsub>;
>> -  defm VFMADDSUBPSZ : avx512_fma3p_forms<0xA6, "vfmaddsub",
>> +  defm VFMADDSUBPSZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub",
>>                                          v16f32_info, X86Fmaddsub>;
>> -  defm VFMSUBADDPSZ : avx512_fma3p_forms<0xA7, "vfmsubadd",
>> +  defm VFMSUBADDPSZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd",
>>                                          v16f32_info, X86Fmsubadd>;
>> -  defm VFNMADDPSZ   : avx512_fma3p_forms<0xAC, "vfnmadd",
>> +  defm VFNMADDPSZ   : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd",
>>                                          v16f32_info, X86Fnmadd>;
>> -  defm VFNMSUBPSZ   : avx512_fma3p_forms<0xAE, "vfnmsub",
>> +  defm VFNMSUBPSZ   : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub",
>>                                          v16f32_info, X86Fnmsub>;
>> }
>> let ExeDomain = SSEPackedDouble in {
>> -  defm VFMADDPDZ    : avx512_fma3p_forms<0xA8, "vfmadd",
>> +  defm VFMADDPDZ    : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd",
>>                                          v8f64_info, X86Fmadd>,
>>                                          VEX_W;
>> -  defm VFMSUBPDZ    : avx512_fma3p_forms<0xAA, "vfmsub",
>> +  defm VFMSUBPDZ    : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub",
>>                                          v8f64_info, X86Fmsub>,
>>                                          VEX_W;
>> -  defm VFMADDSUBPDZ : avx512_fma3p_forms<0xA6, "vfmaddsub",
>> +  defm VFMADDSUBPDZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub",
>>                                          v8f64_info, X86Fmaddsub>,
>>                                          VEX_W;
>> -  defm VFMSUBADDPDZ : avx512_fma3p_forms<0xA7, "vfmsubadd",
>> +  defm VFMSUBADDPDZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd",
>>                                          v8f64_info, X86Fmsubadd>,
>>                                          VEX_W;
>> -  defm VFNMADDPDZ :   avx512_fma3p_forms<0xAC, "vfnmadd",
>> +  defm VFNMADDPDZ :   avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd",
>>                                          v8f64_info, X86Fnmadd>,
>>                                          VEX_W;
>> -  defm VFNMSUBPDZ :   avx512_fma3p_forms<0xAE, "vfnmsub",
>> +  defm VFNMSUBPDZ :   avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub",
>>                                          v8f64_info, X86Fnmsub>,
>>                                          VEX_W;
>> }
>> 
>> 
>> Modified: llvm/trunk/test/MC/X86/avx512-encodings.s
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/avx512-encodings.s?rev=220540&r1=220539&r2=220540&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/MC/X86/avx512-encodings.s (original)
>> +++ llvm/trunk/test/MC/X86/avx512-encodings.s Thu Oct 23 19:03:00
>> 2014
>> @@ -4351,3 +4351,27 @@ vextractf32x4  $3, %zmm3, %xmm1 {%k1}
>> // CHECK: vextracti64x4 $1
>> // CHECK: encoding: [0x62,0x53,0xfd,0xcb,0x3b,0xf4,0x01]
>> vextracti64x4  $1, %zmm14, %ymm12 {%k3} {z}
>> +
>> +// CHECK: vfmadd231ps
>> +// CHECK: encoding: [0x62,0xb2,0x1d,0x48,0xb8,0xe7]
>> +vfmadd231ps %zmm23, %zmm12, %zmm4
>> +
>> +// CHECK: vfmsub231pd
>> +// CHECK: encoding: [0x62,0xe2,0xed,0x48,0xba,0x73,0x08]
>> +vfmsub231pd 0x200(%rbx), %zmm2, %zmm22
>> +
>> +// CHECK: vfmaddsub231ps
>> +// CHECK: encoding: [0x62,0xd2,0x65,0x4b,0xb6,0xec]
>> +vfmaddsub231ps %zmm12, %zmm3, %zmm5 {%k3}
>> +
>> +// CHECK: vfmsubadd231pd
>> +// CHECK: encoding: [0x62,0x72,0x85,0xc5,0xb7,0xdd]
>> +vfmsubadd231pd %zmm5, %zmm31, %zmm11 {%k5}{z}
>> +
>> +// CHECK: vfnmadd231ps
>> +// CHECK: encoding: [0x62,0xf2,0x4d,0x48,0xbc,0xfd]
>> +vfnmadd231ps %zmm5, %zmm6, %zmm7
>> +
>> +// CHECK: vfnmsub231pd
>> +// CHECK: encoding: [0x62,0xf2,0xcd,0x48,0xbe,0xfd]
>> +vfnmsub231pd %zmm5, %zmm6, %zmm7
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>> 
> 
> -- 
> Hal Finkel
> Assistant Computational Scientist
> Leadership Computing Facility
> Argonne National Laboratory

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150202/c0bfa20c/attachment.html>


More information about the llvm-commits mailing list