[llvm] r357703 - [x86] eliminate unnecessary broadcast of horizontal op
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 4 07:46:13 PDT 2019
Author: spatel
Date: Thu Apr 4 07:46:13 2019
New Revision: 357703
URL: http://llvm.org/viewvc/llvm-project?rev=357703&view=rev
Log:
[x86] eliminate unnecessary broadcast of horizontal op
This is another pattern that comes up if we more aggressively
scalarize FP ops.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/haddsub-shuf.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=357703&r1=357702&r2=357703&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Apr 4 07:46:13 2019
@@ -32790,10 +32790,19 @@ static SDValue combineShuffleOfConcatUnd
/// Eliminate a redundant shuffle of a horizontal math op.
static SDValue foldShuffleOfHorizOp(SDNode *N) {
unsigned Opcode = N->getOpcode();
- if (Opcode != X86ISD::MOVDDUP)
+ if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST)
if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
return SDValue();
+ // For a broadcast, peek through an extract element of index 0 to find the
+ // horizontal op: broadcast (ext_vec_elt HOp, 0)
+ if (Opcode == X86ISD::VBROADCAST) {
+ SDValue SrcOp = N->getOperand(0);
+ if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ SrcOp.getValueType() == MVT::f64 && isNullConstant(SrcOp.getOperand(1)))
+ N = SrcOp.getNode();
+ }
+
SDValue HOp = N->getOperand(0);
if (HOp.getOpcode() != X86ISD::HADD && HOp.getOpcode() != X86ISD::FHADD &&
HOp.getOpcode() != X86ISD::HSUB && HOp.getOpcode() != X86ISD::FHSUB)
@@ -32808,10 +32817,11 @@ static SDValue foldShuffleOfHorizOp(SDNo
return SDValue();
// When the operands of a horizontal math op are identical, the low half of
- // the result is the same as the high half. If the shuffle is also replicating
- // low and high halves, we don't need the shuffle.
- if (Opcode == X86ISD::MOVDDUP) {
+ // the result is the same as the high half. If a target shuffle is also
+ // replicating low and high halves, we don't need the shuffle.
+ if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) {
// movddup (hadd X, X) --> hadd X, X
+ // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
assert((HOp.getValueType() == MVT::v2f64 ||
HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op");
return HOp;
Modified: llvm/trunk/test/CodeGen/X86/haddsub-shuf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/haddsub-shuf.ll?rev=357703&r1=357702&r2=357703&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/haddsub-shuf.ll (original)
+++ llvm/trunk/test/CodeGen/X86/haddsub-shuf.ll Thu Apr 4 07:46:13 2019
@@ -349,7 +349,6 @@ define <2 x double> @hadd_v2f64_scalar_s
; AVX2_FAST-LABEL: hadd_v2f64_scalar_splat:
; AVX2_FAST: # %bb.0:
; AVX2_FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
-; AVX2_FAST-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX2_FAST-NEXT: retq
%a0 = extractelement <2 x double> %a, i32 0
%a1 = extractelement <2 x double> %a, i32 1
More information about the llvm-commits
mailing list