[llvm] 7721557 - [X86][SSE] isHorizontalBinOp - ensure we clear any unused source operands to improve HADD/SUB matching
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 15 09:24:45 PDT 2021
Author: Simon Pilgrim
Date: 2021-03-15T16:24:29Z
New Revision: 772155793bd0def6e9c12c063a5fb330c416adfa
URL: https://github.com/llvm/llvm-project/commit/772155793bd0def6e9c12c063a5fb330c416adfa
DIFF: https://github.com/llvm/llvm-project/commit/772155793bd0def6e9c12c063a5fb330c416adfa.diff
LOG: [X86][SSE] isHorizontalBinOp - ensure we clear any unused source operands to improve HADD/SUB matching
Our shuffle matching for HADD/SUB patterns wasn't clearing repeated ops in 'fake unary' style shuffle masks (unpack(x,x) etc.), preventing matching of add(fakeunary(),fakeunary()) style patterns.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/haddsub-3.ll
llvm/test/CodeGen/X86/haddsub-undef.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c767c2a9f90f..0cd08b4c52aa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45833,6 +45833,17 @@ static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS,
RMask.push_back(i);
}
+ // If we have an unary mask, ensure the other op is set to null.
+ if (isUndefOrInRange(LMask, 0, NumElts))
+ B = SDValue();
+ else if (isUndefOrInRange(LMask, NumElts, NumElts * 2))
+ A = SDValue();
+
+ if (isUndefOrInRange(RMask, 0, NumElts))
+ D = SDValue();
+ else if (isUndefOrInRange(RMask, NumElts, NumElts * 2))
+ C = SDValue();
+
// If A and B occur in reverse order in RHS, then canonicalize by commuting
// RHS operands and shuffle mask.
if (A != C) {
diff --git a/llvm/test/CodeGen/X86/haddsub-3.ll b/llvm/test/CodeGen/X86/haddsub-3.ll
index c83a7b73edf5..3be93b570f84 100644
--- a/llvm/test/CodeGen/X86/haddsub-3.ll
+++ b/llvm/test/CodeGen/X86/haddsub-3.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3-SLOW
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s --check-prefix=SSSE3-FAST
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3,SSSE3-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s --check-prefixes=SSSE3,SSSE3-FAST
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX1,AVX1-SLOW
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX1,AVX1-FAST
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
@@ -82,33 +82,18 @@ define <4 x double> @PR41414(i64 %x, <4 x double> %y) {
; SSE2-NEXT: addpd %xmm2, %xmm1
; SSE2-NEXT: retq
;
-; SSSE3-SLOW-LABEL: PR41414:
-; SSSE3-SLOW: # %bb.0:
-; SSSE3-SLOW-NEXT: movq %rdi, %xmm2
-; SSSE3-SLOW-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
-; SSSE3-SLOW-NEXT: subpd {{.*}}(%rip), %xmm2
-; SSSE3-SLOW-NEXT: movddup {{.*#+}} xmm3 = xmm2[0,0]
-; SSSE3-SLOW-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
-; SSSE3-SLOW-NEXT: addpd %xmm3, %xmm2
-; SSSE3-SLOW-NEXT: divpd %xmm2, %xmm1
-; SSSE3-SLOW-NEXT: divpd %xmm2, %xmm0
-; SSSE3-SLOW-NEXT: xorpd %xmm2, %xmm2
-; SSSE3-SLOW-NEXT: addpd %xmm2, %xmm0
-; SSSE3-SLOW-NEXT: addpd %xmm2, %xmm1
-; SSSE3-SLOW-NEXT: retq
-;
-; SSSE3-FAST-LABEL: PR41414:
-; SSSE3-FAST: # %bb.0:
-; SSSE3-FAST-NEXT: movq %rdi, %xmm2
-; SSSE3-FAST-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
-; SSSE3-FAST-NEXT: subpd {{.*}}(%rip), %xmm2
-; SSSE3-FAST-NEXT: haddpd %xmm2, %xmm2
-; SSSE3-FAST-NEXT: divpd %xmm2, %xmm1
-; SSSE3-FAST-NEXT: divpd %xmm2, %xmm0
-; SSSE3-FAST-NEXT: xorpd %xmm2, %xmm2
-; SSSE3-FAST-NEXT: addpd %xmm2, %xmm0
-; SSSE3-FAST-NEXT: addpd %xmm2, %xmm1
-; SSSE3-FAST-NEXT: retq
+; SSSE3-LABEL: PR41414:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: movq %rdi, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
+; SSSE3-NEXT: subpd {{.*}}(%rip), %xmm2
+; SSSE3-NEXT: haddpd %xmm2, %xmm2
+; SSSE3-NEXT: divpd %xmm2, %xmm1
+; SSSE3-NEXT: divpd %xmm2, %xmm0
+; SSSE3-NEXT: xorpd %xmm2, %xmm2
+; SSSE3-NEXT: addpd %xmm2, %xmm0
+; SSSE3-NEXT: addpd %xmm2, %xmm1
+; SSSE3-NEXT: retq
;
; AVX1-LABEL: PR41414:
; AVX1: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/CodeGen/X86/haddsub-undef.ll
index 8a5e1cd66364..1be27fa3846d 100644
--- a/llvm/test/CodeGen/X86/haddsub-undef.ll
+++ b/llvm/test/CodeGen/X86/haddsub-undef.ll
@@ -467,17 +467,10 @@ define <2 x double> @add_pd_003_2(<2 x double> %x) {
}
define <2 x double> @add_pd_010(<2 x double> %x) {
-; SSE-SLOW-LABEL: add_pd_010:
-; SSE-SLOW: # %bb.0:
-; SSE-SLOW-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0]
-; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-SLOW-NEXT: addpd %xmm1, %xmm0
-; SSE-SLOW-NEXT: retq
-;
-; SSE-FAST-LABEL: add_pd_010:
-; SSE-FAST: # %bb.0:
-; SSE-FAST-NEXT: haddpd %xmm0, %xmm0
-; SSE-FAST-NEXT: retq
+; SSE-LABEL: add_pd_010:
+; SSE: # %bb.0:
+; SSE-NEXT: haddpd %xmm0, %xmm0
+; SSE-NEXT: retq
;
; AVX-SLOW-LABEL: add_pd_010:
; AVX-SLOW: # %bb.0:
More information about the llvm-commits
mailing list