[llvm] r359095 - [x86] make sure horizontal op and broadcast types match to simplify (PR41414)
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 24 07:05:08 PDT 2019
Author: spatel
Date: Wed Apr 24 07:05:08 2019
New Revision: 359095
URL: http://llvm.org/viewvc/llvm-project?rev=359095&view=rev
Log:
[x86] make sure horizontal op and broadcast types match to simplify (PR41414)
If the types don't match, we can't just remove the shuffle.
There may be some other opportunity for optimization here,
but this should prevent the crashing seen in:
https://bugs.llvm.org/show_bug.cgi?id=41414
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/haddsub-3.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=359095&r1=359094&r2=359095&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Apr 24 07:05:08 2019
@@ -32820,10 +32820,13 @@ static SDValue foldShuffleOfHorizOp(SDNo
// For a broadcast, peek through an extract element of index 0 to find the
// horizontal op: broadcast (ext_vec_elt HOp, 0)
+ EVT VT = N->getValueType(0);
if (Opcode == X86ISD::VBROADCAST) {
SDValue SrcOp = N->getOperand(0);
if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- SrcOp.getValueType() == MVT::f64 && isNullConstant(SrcOp.getOperand(1)))
+ SrcOp.getValueType() == MVT::f64 &&
+ SrcOp.getOperand(0).getValueType() == VT &&
+ isNullConstant(SrcOp.getOperand(1)))
N = SrcOp.getNode();
}
@@ -32847,7 +32850,8 @@ static SDValue foldShuffleOfHorizOp(SDNo
// movddup (hadd X, X) --> hadd X, X
// broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
assert((HOp.getValueType() == MVT::v2f64 ||
- HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op");
+ HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT &&
+ "Unexpected type for h-op");
return HOp;
}
Modified: llvm/trunk/test/CodeGen/X86/haddsub-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/haddsub-3.ll?rev=359095&r1=359094&r2=359095&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/haddsub-3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/haddsub-3.ll Wed Apr 24 07:05:08 2019
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
define float @pr26491(<4 x float> %a0) {
; SSE2-LABEL: pr26491:
@@ -37,3 +38,66 @@ define float @pr26491(<4 x float> %a0) {
%5 = fadd float %3, %4
ret float %5
}
+
+; When simplifying away a splat (broadcast), the hop type must match the shuffle type.
+
+define <4 x double> @PR41414(i64 %x, <4 x double> %y) {
+; SSE2-LABEL: PR41414:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movq %rdi, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
+; SSE2-NEXT: subpd {{.*}}(%rip), %xmm2
+; SSE2-NEXT: movapd %xmm2, %xmm3
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1]
+; SSE2-NEXT: addpd %xmm2, %xmm3
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0,0]
+; SSE2-NEXT: divpd %xmm3, %xmm1
+; SSE2-NEXT: divpd %xmm3, %xmm0
+; SSE2-NEXT: xorpd %xmm2, %xmm2
+; SSE2-NEXT: addpd %xmm2, %xmm0
+; SSE2-NEXT: addpd %xmm2, %xmm1
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: PR41414:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: movq %rdi, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
+; SSSE3-NEXT: subpd {{.*}}(%rip), %xmm2
+; SSSE3-NEXT: haddpd %xmm2, %xmm2
+; SSSE3-NEXT: divpd %xmm2, %xmm1
+; SSSE3-NEXT: divpd %xmm2, %xmm0
+; SSSE3-NEXT: xorpd %xmm2, %xmm2
+; SSSE3-NEXT: addpd %xmm2, %xmm0
+; SSSE3-NEXT: addpd %xmm2, %xmm1
+; SSSE3-NEXT: retq
+;
+; AVX1-LABEL: PR41414:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovq %rdi, %xmm1
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT: vdivpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: PR41414:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovq %rdi, %xmm1
+; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1
+; AVX2-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
+; AVX2-NEXT: vdivpd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %conv = uitofp i64 %x to double
+ %t0 = insertelement <4 x double> undef, double %conv, i32 0
+ %t1 = shufflevector <4 x double> %t0, <4 x double> undef, <4 x i32> zeroinitializer
+ %t2 = fdiv <4 x double> %y, %t1
+ %t3 = fadd <4 x double> zeroinitializer, %t2
+ ret <4 x double> %t3
+}
More information about the llvm-commits
mailing list