[llvm] r290073 - [X86] Remove all of the patterns that use X86ISD:FAND/FXOR/FOR/FANDN except for the ones needed for SSE1. Anything SSE2 or above uses the integer ISD opcode.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 18 16:42:29 PST 2016
Author: ctopper
Date: Sun Dec 18 18:42:28 2016
New Revision: 290073
URL: http://llvm.org/viewvc/llvm-project?rev=290073&view=rev
Log:
[X86] Remove all of the patterns that use X86ISD:FAND/FXOR/FOR/FANDN except for the ones needed for SSE1. Anything SSE2 or above uses the integer ISD opcode.
This removes 11721 bytes from the DAG isel table or 2.2%
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=290073&r1=290072&r2=290073&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Dec 18 18:42:28 2016
@@ -4383,31 +4383,33 @@ defm VMAXCSDZ : avx512_comutable_binop_s
SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>;
-multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86VectorVTInfo _, OpndItins itins,
bit IsCommutable> {
- let ExeDomain = _.ExeDomain in {
+ let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
IsCommutable>, EVEX_4V;
- defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
- "$src2, $src1", "$src1, $src2",
- (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
- EVEX_4V;
- defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
- (OpNode _.RC:$src1, (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)))),
- itins.rm>, EVEX_4V, EVEX_B;
+ let mayLoad = 1 in {
+ defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
+ EVEX_4V;
+ defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))),
+ itins.rm>, EVEX_4V, EVEX_B;
+ }
}
}
-multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -4418,7 +4420,7 @@ multiclass avx512_fp_round_packed<bits<8
}
-multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -4428,7 +4430,7 @@ multiclass avx512_fp_sae_packed<bits<8>
EVEX_4V, EVEX_B;
}
-multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Predicate prd, SizeItins itins,
bit IsCommutable = 0> {
let Predicates = [prd] in {
@@ -4493,13 +4495,13 @@ let isCodeGenOnly = 1 in {
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
SSE_ALU_ITINS_P, 1>;
}
-defm VAND : avx512_fp_binop_p<0x54, "vand", X86fand, HasDQI,
+defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
SSE_ALU_ITINS_P, 1>;
-defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, HasDQI,
+defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
SSE_ALU_ITINS_P, 0>;
-defm VOR : avx512_fp_binop_p<0x56, "vor", X86for, HasDQI,
+defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
SSE_ALU_ITINS_P, 1>;
-defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, HasDQI,
+defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
SSE_ALU_ITINS_P, 1>;
// Patterns catch floating point selects with bitcasted integer logic ops.
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=290073&r1=290072&r2=290073&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun Dec 18 18:42:28 2016
@@ -2859,80 +2859,6 @@ let Predicates = [HasAVX1Only] in {
}
let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
- def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
- (VANDPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
- (VORPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
- (VXORPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
- (VANDNPSrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(X86fand VR128:$src1, (loadv4f32 addr:$src2)),
- (VANDPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86for VR128:$src1, (loadv4f32 addr:$src2)),
- (VORPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR128:$src1, (loadv4f32 addr:$src2)),
- (VXORPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR128:$src1, (loadv4f32 addr:$src2)),
- (VANDNPSrm VR128:$src1, addr:$src2)>;
-
- def : Pat<(v2f64 (X86fand VR128:$src1, VR128:$src2)),
- (VANDPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86for VR128:$src1, VR128:$src2)),
- (VORPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86fxor VR128:$src1, VR128:$src2)),
- (VXORPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86fandn VR128:$src1, VR128:$src2)),
- (VANDNPDrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(X86fand VR128:$src1, (loadv2f64 addr:$src2)),
- (VANDPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86for VR128:$src1, (loadv2f64 addr:$src2)),
- (VORPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR128:$src1, (loadv2f64 addr:$src2)),
- (VXORPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR128:$src1, (loadv2f64 addr:$src2)),
- (VANDNPDrm VR128:$src1, addr:$src2)>;
-
- def : Pat<(v8f32 (X86fand VR256:$src1, VR256:$src2)),
- (VANDPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8f32 (X86for VR256:$src1, VR256:$src2)),
- (VORPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8f32 (X86fxor VR256:$src1, VR256:$src2)),
- (VXORPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8f32 (X86fandn VR256:$src1, VR256:$src2)),
- (VANDNPSYrr VR256:$src1, VR256:$src2)>;
-
- def : Pat<(X86fand VR256:$src1, (loadv8f32 addr:$src2)),
- (VANDPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86for VR256:$src1, (loadv8f32 addr:$src2)),
- (VORPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR256:$src1, (loadv8f32 addr:$src2)),
- (VXORPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR256:$src1, (loadv8f32 addr:$src2)),
- (VANDNPSYrm VR256:$src1, addr:$src2)>;
-
- def : Pat<(v4f64 (X86fand VR256:$src1, VR256:$src2)),
- (VANDPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4f64 (X86for VR256:$src1, VR256:$src2)),
- (VORPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4f64 (X86fxor VR256:$src1, VR256:$src2)),
- (VXORPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4f64 (X86fandn VR256:$src1, VR256:$src2)),
- (VANDNPDYrr VR256:$src1, VR256:$src2)>;
-
- def : Pat<(X86fand VR256:$src1, (loadv4f64 addr:$src2)),
- (VANDPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86for VR256:$src1, (loadv4f64 addr:$src2)),
- (VORPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR256:$src1, (loadv4f64 addr:$src2)),
- (VXORPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR256:$src1, (loadv4f64 addr:$src2)),
- (VANDNPDYrm VR256:$src1, addr:$src2)>;
-}
-
-let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
// Use packed logical operations for scalar ops.
def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
(COPY_TO_REGCLASS (VANDPDrr
@@ -2970,24 +2896,6 @@ let Predicates = [HasAVX, NoVLX_Or_NoDQI
}
let Predicates = [UseSSE1] in {
- def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
- (ANDPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
- (ORPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
- (XORPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
- (ANDNPSrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)),
- (ANDPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)),
- (ORPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)),
- (XORPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
- (ANDNPSrm VR128:$src1, addr:$src2)>;
-
// Use packed logical operations for scalar ops.
def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
(COPY_TO_REGCLASS (ANDPSrr
@@ -3008,24 +2916,6 @@ let Predicates = [UseSSE1] in {
}
let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86fand VR128:$src1, VR128:$src2)),
- (ANDPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86for VR128:$src1, VR128:$src2)),
- (ORPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86fxor VR128:$src1, VR128:$src2)),
- (XORPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86fandn VR128:$src1, VR128:$src2)),
- (ANDNPDrr VR128:$src1, VR128:$src2)>;
-
- def : Pat<(X86fand VR128:$src1, (memopv2f64 addr:$src2)),
- (ANDPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86for VR128:$src1, (memopv2f64 addr:$src2)),
- (ORPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fxor VR128:$src1, (memopv2f64 addr:$src2)),
- (XORPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86fandn VR128:$src1, (memopv2f64 addr:$src2)),
- (ANDNPDrm VR128:$src1, addr:$src2)>;
-
// Use packed logical operations for scalar ops.
def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
(COPY_TO_REGCLASS (ANDPDrr
@@ -3045,6 +2935,25 @@ let Predicates = [UseSSE2] in {
(COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
}
+// Patterns for packed operations when we don't have integer type available.
+def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
+ (ANDPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
+ (ORPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
+ (XORPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
+ (ANDNPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)),
+ (ANDPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)),
+ (ORPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)),
+ (XORPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
+ (ANDNPSrm VR128:$src1, addr:$src2)>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Arithmetic Instructions
//===----------------------------------------------------------------------===//
More information about the llvm-commits
mailing list