[llvm] r276614 - [AVX512] Add some additional patterns so that we can fold broadcast loads in the first argument of an FMADD/FMSUB/FNMADD/FNMSUB/FMADDSUB/FMSUBADD node. Also add patterns to support all combinations of the broadcast input and the preserved input for masked versions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 25 00:20:32 PDT 2016
Author: ctopper
Date: Mon Jul 25 02:20:31 2016
New Revision: 276614
URL: http://llvm.org/viewvc/llvm-project?rev=276614&view=rev
Log:
[AVX512] Add some additional patterns so that we can fold broadcast loads in the first argument of an FMADD/FMSUB/FNMADD/FNMSUB/FMADDSUB/FMSUBADD node. Also add patterns to support all combinations of the broadcast input and the preserved input for masked versions.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=276614&r1=276613&r2=276614&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Jul 25 02:20:31 2016
@@ -4756,10 +4756,9 @@ let Predicates = [HasAVX512] in {
// FMA - Fused Multiply Operations
//
-let Constraints = "$src1 = $dst" in {
multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in {
+ X86VectorVTInfo _, string Suff> {
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
@@ -4780,40 +4779,48 @@ multiclass avx512_fma3p_213_rm<bits<8> o
_.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B;
}
+
+ // Additional pattern for folding broadcast nodes in other orders.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src1, _.RC:$src2,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
+ _.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
}
multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in
+ X86VectorVTInfo _, string Suff> {
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc)))>,
AVX512FMA3Base, EVEX_B, EVEX_RC;
}
-} // Constraints = "$src1 = $dst"
multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+ SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
+ string Suff> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512>,
- avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
- EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+ defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
+ avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
+ Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasAVX512] in {
- defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256>,
+ defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
- defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128>,
+ defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd > {
+ SDNode OpNodeRnd > {
defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
- avx512vl_f32_info>;
+ avx512vl_f32_info, "PS">;
defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
- avx512vl_f64_info>, VEX_W;
+ avx512vl_f64_info, "PD">, VEX_W;
}
defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
@@ -4824,10 +4831,9 @@ defm VFNMADD213 : avx512_fma3p_213_f<0
defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
-let Constraints = "$src1 = $dst" in {
multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in {
+ X86VectorVTInfo _, string Suff> {
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
@@ -4848,40 +4854,58 @@ multiclass avx512_fma3p_231_rm<bits<8> o
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
_.RC:$src1))>, AVX512FMA3Base, EVEX_B;
}
+
+ // Additional patterns for folding broadcast nodes in other orders.
+ def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1)),
+ (!cast<Instruction>(NAME#Suff#_.ZSuffix#mb) _.RC:$src1,
+ _.RC:$src2, addr:$src3)>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
+ _.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#Suff#_.ZSuffix#mbkz) _.RC:$src1,
+ _.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
}
multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in
+ X86VectorVTInfo _, string Suff> {
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc)))>,
AVX512FMA3Base, EVEX_B, EVEX_RC;
}
-} // Constraints = "$src1 = $dst"
multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+ SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
+ string Suff> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512>,
- avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
- EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+ defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
+ avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
+ Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasAVX512] in {
- defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256>,
+ defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
- defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128>,
+ defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd > {
+ SDNode OpNodeRnd > {
defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
- avx512vl_f32_info>;
+ avx512vl_f32_info, "PS">;
defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
- avx512vl_f64_info>, VEX_W;
+ avx512vl_f64_info, "PD">, VEX_W;
}
defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
@@ -4891,10 +4915,9 @@ defm VFMSUBADD231 : avx512_fma3p_231_f<0
defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
-let Constraints = "$src1 = $dst" in {
multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in {
+ X86VectorVTInfo _, string Suff> {
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
@@ -4915,40 +4938,48 @@ multiclass avx512_fma3p_132_rm<bits<8> o
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
_.RC:$src2))>, AVX512FMA3Base, EVEX_B;
}
+
+ // Additional patterns for folding broadcast nodes in other orders.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src1, _.RC:$src2),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
+ _.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
}
multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in
+ X86VectorVTInfo _, string Suff> {
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc)))>,
AVX512FMA3Base, EVEX_B, EVEX_RC;
}
-} // Constraints = "$src1 = $dst"
multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+ SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
+ string Suff> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512>,
- avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
- EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+ defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
+ avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
+ Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasAVX512] in {
- defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256>,
+ defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
- defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128>,
+ defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd > {
+ SDNode OpNodeRnd > {
defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
- avx512vl_f32_info>;
+ avx512vl_f32_info, "PS">;
defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
- avx512vl_f64_info>, VEX_W;
+ avx512vl_f64_info, "PD">, VEX_W;
}
defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=276614&r1=276613&r2=276614&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Mon Jul 25 02:20:31 2016
@@ -1864,8 +1864,7 @@ define <4 x float> @test_mask_vfmadd128_
; CHECK-LABEL: test_mask_vfmadd128_ps_rmb:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
-; CHECK-NEXT: vbroadcastss (%rdi), %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x18,0x17]
-; CHECK-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
+; CHECK-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%q = load float, float* %ptr_a2
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -1880,8 +1879,7 @@ define <4 x float> @test_mask_vfmadd128_
; CHECK-LABEL: test_mask_vfmadd128_ps_rmba:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
-; CHECK-NEXT: vbroadcastss (%rdi), %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x18,0x17]
-; CHECK-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
+; CHECK-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%q = load float, float* %ptr_a2, align 4
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
More information about the llvm-commits
mailing list