[llvm] r230024 - canonicalize a v2f64 blendi of 2 registers
Sanjay Patel
spatel at rotateright.com
Fri Feb 20 08:55:27 PST 2015
Author: spatel
Date: Fri Feb 20 10:55:27 2015
New Revision: 230024
URL: http://llvm.org/viewvc/llvm-project?rev=230024&view=rev
Log:
canonicalize a v2f64 blendi of 2 registers
This canonicalization step saves us 3 pattern matching possibilities * 4 math ops
for scalar FP math that uses xmm regs. The backend can re-commute the operands
post-instruction-selection if that makes register allocation better.
The tests in llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll cover this scenario already,
so there are no new tests with this patch.
Differential Revision: http://reviews.llvm.org/D7777
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=230024&r1=230023&r2=230024&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Feb 20 10:55:27 2015
@@ -23024,6 +23024,32 @@ static SDValue PerformINSERTPSCombine(SD
LoadScalarToVector, N->getOperand(2));
}
+static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = N->getOperand(1);
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // Canonicalize a v2f64 blend with a mask of 2 by swapping the vector
+ // operands and changing the mask to 1. This saves us a bunch of
+ // pattern-matching possibilities related to scalar math ops in SSE/AVX.
+ // x86InstrInfo knows how to commute this back after instruction selection
+ // if it would help register allocation.
+
+ // TODO: If optimizing for size or a processor that doesn't suffer from
+ // partial register update stalls, this should be transformed into a MOVSD
+ // instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
+
+ if (VT == MVT::v2f64)
+ if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) {
+ SDValue NewMask = DAG.getConstant(1, MVT::i8);
+ return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
+ }
+
+ return SDValue();
+}
+
// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
// as "sbb reg,reg", since it can be extended without zext and produces
// an all-ones bit which is more useful than 0/1 in some cases.
@@ -23440,6 +23466,7 @@ SDValue X86TargetLowering::PerformDAGCom
return PerformINSERTPSCombine(N, DAG, Subtarget);
break;
}
+ case X86ISD::BLENDI: return PerformBLENDICombine(N, DAG);
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DAG, Subtarget);
}
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=230024&r1=230023&r2=230024&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Feb 20 10:55:27 2015
@@ -3161,8 +3161,7 @@ let isCodeGenOnly = 1 in {
// addss %xmm1, %xmm0
// TODO: Some canonicalization in lowering would simplify the number of
-// patterns we have to try to match. In particular, the reversed order blends
-// seem unnecessary.
+// patterns we have to try to match.
multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [UseSSE1] in {
// extracted scalar math op with insert via movss
@@ -3263,16 +3262,9 @@ multiclass scalar_math_f64_patterns<SDNo
def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
(Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
(!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
-
- // vector math op with insert via blend (reversed order)
- def : Pat<(v2f64 (X86Blendi
- (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)),
- (v2f64 VR128:$dst), (i8 2))),
- (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
}
- // Repeat everything for AVX and add one more pattern
- // (the scalar + blend reversed order) for good measure.
+ // Repeat everything for AVX.
let Predicates = [HasAVX] in {
// extracted scalar math op with insert via movsd
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
@@ -3288,13 +3280,6 @@ multiclass scalar_math_f64_patterns<SDNo
(!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64:$src, VR128))>;
- // extracted scalar math op with insert via blend (reversed order)
- def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector
- (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
- FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
- (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst,
- (COPY_TO_REGCLASS FR64:$src, VR128))>;
-
// vector math op with insert via movsd
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
(Op (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
@@ -3304,12 +3289,6 @@ multiclass scalar_math_f64_patterns<SDNo
def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
(Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
(!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
-
- // vector math op with insert via blend (reversed order)
- def : Pat<(v2f64 (X86Blendi
- (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)),
- (v2f64 VR128:$dst), (i8 2))),
- (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>;
}
}
More information about the llvm-commits
mailing list