[PATCH] D21356: [x86] add folds for x86 vector compare nodes (PR27924)

Tue Jun 14 15:01:37 PDT 2016

spatel created this revision.
spatel added reviewers: craig.topper, RKSimon, ab.
spatel added a subscriber: llvm-commits.
Herald added a subscriber: mcrosier.

Ideally, we can get rid of most x86 LLVM intrinsics by transforming them to IR, but it doesn't cost much to have some simple folds in the backend too while we're working on that and as a backstop.

This fixes:
https://llvm.org/bugs/show_bug.cgi?id=27924

I am planning to work on the IR upgrades too to avoid these kinds of problems earlier.

http://reviews.llvm.org/D21356

Files:
  lib/Target/X86/X86ISelLowering.cpp
  test/CodeGen/X86/vector-compare-combines.ll

Index: test/CodeGen/X86/vector-compare-combines.ll
===================================================================

--- test/CodeGen/X86/vector-compare-combines.ll
+++ test/CodeGen/X86/vector-compare-combines.ll
@@ -2,21 +2,19 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
 
-; FIXME: If we have SSE/AVX intrinsics in the code, we miss obvious combines
+; If we have SSE/AVX intrinsics in the code, we miss obvious combines
 ; unless we do them late on X86-specific nodes.
 
 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>)
 
 define <4 x i32> @PR27924_cmpeq(<4 x i32> %a, <4 x i32> %b) {
 ; SSE-LABEL: PR27924_cmpeq:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pmaxsd %xmm1, %xmm0
 ; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: PR27924_cmpeq:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
@@ -31,14 +29,12 @@
 define <4 x i32> @PR27924_cmpgt(<4 x i32> %a, <4 x i32> %b) {
 ; SSE-LABEL: PR27924_cmpgt:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pmaxsd %xmm1, %xmm0
-; SSE-NEXT:    pcmpgtd %xmm0, %xmm0
+; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: PR27924_cmpgt:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpgtd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
   %cmp = icmp sgt <4 x i32> %a, %b
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -30459,6 +30459,22 @@
                      Op0->getOperand(0), Op0->getOperand(1));
 }
 
+static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
+                                    const X86Subtarget &Subtarget) {
+  MVT VT = N->getSimpleValueType(0);
+  SDLoc DL(N);
+
+  if (N->getOperand(0) == N->getOperand(1)) {
+    if (N->getOpcode() == X86ISD::PCMPEQ)
+      return getOnesVector(VT, Subtarget, DAG, DL);
+    if (N->getOpcode() == X86ISD::PCMPGT)
+      return getZeroVector(VT, Subtarget, DAG, DL);
+  }
+
+  return SDValue();
+}
+
+
 SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -30539,6 +30555,8 @@
   case ISD::MSCATTER:       return combineGatherScatter(N, DAG);
   case X86ISD::LSUB:        return combineLockSub(N, DAG, Subtarget);
   case X86ISD::TESTM:       return PerformTESTM(N, DAG);
+  case X86ISD::PCMPEQ:
+  case X86ISD::PCMPGT:      return combineVectorCompare(N, DAG, Subtarget);
   }
 
   return SDValue();


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D21356.60766.patch
Type: text/x-patch
Size: 2880 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160614/c12a4af3/attachment.bin>