[llvm] r237455 - Mark SMIN/SMAX/UMIN/UMAX nodes as legal and add patterns for them.

Fri May 15 09:15:58 PDT 2015

Author: jamesm
Date: Fri May 15 11:15:57 2015
New Revision: 237455

URL: http://llvm.org/viewvc/llvm-project?rev=237455&view=rev
Log:
Mark SMIN/SMAX/UMIN/UMAX nodes as legal and add patterns for them.

The new [SU]{MIN,MAX} SDNodes can be lowered directly to instructions for
most NEON datatypes - the big exclusion being v2i64.

Added:
    llvm/trunk/test/CodeGen/AArch64/minmax.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=237455&r1=237454&r2=237455&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Fri May 15 11:15:57 2015
@@ -679,6 +679,12 @@ void AArch64TargetLowering::addTypeForNE
   setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
   setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
 
+  // [SU][MIN|MAX] are available for all NEON types apart from i64.
+  if (!VT.isFloatingPoint() &&
+      VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
+    for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+      setOperationAction(Opcode, VT.getSimpleVT(), Legal);
+
   if (Subtarget->isLittleEndian()) {
     for (unsigned im = (unsigned)ISD::PRE_INC;
          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=237455&r1=237454&r2=237455&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Fri May 15 11:15:57 2015
@@ -2809,6 +2809,55 @@ defm ORN : SIMDLogicalThreeVector<0, 0b1
                                   BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
 defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
 
+def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)),
+          (SMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)),
+          (SMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)),
+          (SMINv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)),
+          (SMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)),
+          (SMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)),
+          (SMINv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)),
+          (SMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)),
+          (SMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)),
+          (SMAXv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)),
+          (SMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)),
+          (SMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)),
+          (SMAXv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)),
+          (UMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)),
+          (UMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)),
+          (UMINv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)),
+          (UMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)),
+          (UMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)),
+          (UMINv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)),
+          (UMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)),
+          (UMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)),
+          (UMAXv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)),
+          (UMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)),
+          (UMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)),
+          (UMAXv4i32 V128:$Rn, V128:$Rm)>;
+
 def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
           (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
 def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),

Added: llvm/trunk/test/CodeGen/AArch64/minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/minmax.ll?rev=237455&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/minmax.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/minmax.ll Fri May 15 11:15:57 2015
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+
+; CHECK-LABEL: t1
+; CHECK: smax
+define <4 x i32> @t1(<4 x i32> %a, <4 x i32> %b) {
+  %t1 = icmp sgt <4 x i32> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t2
+; CHECK: smin
+define <4 x i32> @t2(<4 x i32> %a, <4 x i32> %b) {
+  %t1 = icmp slt <4 x i32> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t3
+; CHECK: umax
+define <4 x i32> @t3(<4 x i32> %a, <4 x i32> %b) {
+  %t1 = icmp ugt <4 x i32> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t4
+; CHECK: umin
+define <8 x i8> @t4(<8 x i8> %a, <8 x i8> %b) {
+  %t1 = icmp ult <8 x i8> %a, %b
+  %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b
+  ret <8 x i8> %t2
+}
+
+; CHECK-LABEL: t5
+; CHECK: smin
+define <4 x i16> @t5(<4 x i16> %a, <4 x i16> %b) {
+  %t1 = icmp sgt <4 x i16> %b, %a
+  %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b
+  ret <4 x i16> %t2
+}
+
+; CHECK-LABEL: t6
+; CHECK: smax
+define <2 x i32> @t6(<2 x i32> %a, <2 x i32> %b) {
+  %t1 = icmp slt <2 x i32> %b, %a
+  %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b
+  ret <2 x i32> %t2
+}
+
+; CHECK-LABEL: t7
+; CHECK: umin
+define <16 x i8> @t7(<16 x i8> %a, <16 x i8> %b) {
+  %t1 = icmp ugt <16 x i8> %b, %a
+  %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %t2
+}
+
+; CHECK-LABEL: t8
+; CHECK: umax
+define <8 x i16> @t8(<8 x i16> %a, <8 x i16> %b) {
+  %t1 = icmp ult <8 x i16> %b, %a
+  %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %t2
+}
+
+; CHECK-LABEL: t9
+; CHECK: umin
+; CHECK: smax
+define <4 x i32> @t9(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+  %t1 = icmp ugt <4 x i32> %b, %a
+  %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+  %t3 = icmp sge <4 x i32> %t2, %c
+  %t4 = select <4 x i1> %t3, <4 x i32> %t2, <4 x i32> %c
+  ret <4 x i32> %t4
+}
+
+; CHECK-LABEL: t10
+; CHECK: smax
+; CHECK: smax
+define <8 x i32> @t10(<8 x i32> %a, <8 x i32> %b) {
+  %t1 = icmp sgt <8 x i32> %a, %b
+  %t2 = select <8 x i1> %t1, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %t2
+}
+
+; CHECK-LABEL: t11
+; CHECK: smin
+; CHECK: smin
+; CHECK: smin
+; CHECK: smin
+define <16 x i32> @t11(<16 x i32> %a, <16 x i32> %b) {
+  %t1 = icmp sle <16 x i32> %a, %b
+  %t2 = select <16 x i1> %t1, <16 x i32> %a, <16 x i32> %b
+  ret <16 x i32> %t2
+}