[llvm] r237455 - Mark SMIN/SMAX/UMIN/UMAX nodes as legal and add patterns for them.
James Molloy
james.molloy at arm.com
Fri May 15 09:15:58 PDT 2015
Author: jamesm
Date: Fri May 15 11:15:57 2015
New Revision: 237455
URL: http://llvm.org/viewvc/llvm-project?rev=237455&view=rev
Log:
Mark SMIN/SMAX/UMIN/UMAX nodes as legal and add patterns for them.
The new [SU]{MIN,MAX} SDNodes can be lowered directly to instructions for
most NEON datatypes - the big exclusion being v2i64.
Added:
llvm/trunk/test/CodeGen/AArch64/minmax.ll
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=237455&r1=237454&r2=237455&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Fri May 15 11:15:57 2015
@@ -679,6 +679,12 @@ void AArch64TargetLowering::addTypeForNE
setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
+ // [SU][MIN|MAX] are available for all NEON types apart from i64.
+ if (!VT.isFloatingPoint() &&
+ VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
+ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+ setOperationAction(Opcode, VT.getSimpleVT(), Legal);
+
if (Subtarget->isLittleEndian()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=237455&r1=237454&r2=237455&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Fri May 15 11:15:57 2015
@@ -2809,6 +2809,55 @@ defm ORN : SIMDLogicalThreeVector<0, 0b1
BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
+def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)),
+ (SMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)),
+ (SMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)),
+ (SMINv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)),
+ (SMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)),
+ (SMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)),
+ (SMINv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)),
+ (SMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)),
+ (SMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)),
+ (SMAXv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)),
+ (SMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)),
+ (SMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)),
+ (SMAXv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)),
+ (UMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)),
+ (UMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)),
+ (UMINv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)),
+ (UMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)),
+ (UMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)),
+ (UMINv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)),
+ (UMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)),
+ (UMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)),
+ (UMAXv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)),
+ (UMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)),
+ (UMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)),
+ (UMAXv4i32 V128:$Rn, V128:$Rm)>;
+
def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
(BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
Added: llvm/trunk/test/CodeGen/AArch64/minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/minmax.ll?rev=237455&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/minmax.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/minmax.ll Fri May 15 11:15:57 2015
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+
+; CHECK-LABEL: t1
+; CHECK: smax
+define <4 x i32> @t1(<4 x i32> %a, <4 x i32> %b) {
+ %t1 = icmp sgt <4 x i32> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t2
+; CHECK: smin
+define <4 x i32> @t2(<4 x i32> %a, <4 x i32> %b) {
+ %t1 = icmp slt <4 x i32> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t3
+; CHECK: umax
+define <4 x i32> @t3(<4 x i32> %a, <4 x i32> %b) {
+ %t1 = icmp ugt <4 x i32> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t4
+; CHECK: umin
+define <8 x i8> @t4(<8 x i8> %a, <8 x i8> %b) {
+ %t1 = icmp ult <8 x i8> %a, %b
+ %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b
+ ret <8 x i8> %t2
+}
+
+; CHECK-LABEL: t5
+; CHECK: smin
+define <4 x i16> @t5(<4 x i16> %a, <4 x i16> %b) {
+ %t1 = icmp sgt <4 x i16> %b, %a
+ %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b
+ ret <4 x i16> %t2
+}
+
+; CHECK-LABEL: t6
+; CHECK: smax
+define <2 x i32> @t6(<2 x i32> %a, <2 x i32> %b) {
+ %t1 = icmp slt <2 x i32> %b, %a
+ %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b
+ ret <2 x i32> %t2
+}
+
+; CHECK-LABEL: t7
+; CHECK: umin
+define <16 x i8> @t7(<16 x i8> %a, <16 x i8> %b) {
+ %t1 = icmp ugt <16 x i8> %b, %a
+ %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %t2
+}
+
+; CHECK-LABEL: t8
+; CHECK: umax
+define <8 x i16> @t8(<8 x i16> %a, <8 x i16> %b) {
+ %t1 = icmp ult <8 x i16> %b, %a
+ %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %t2
+}
+
+; CHECK-LABEL: t9
+; CHECK: umin
+; CHECK: smax
+define <4 x i32> @t9(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %t1 = icmp ugt <4 x i32> %b, %a
+ %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+ %t3 = icmp sge <4 x i32> %t2, %c
+ %t4 = select <4 x i1> %t3, <4 x i32> %t2, <4 x i32> %c
+ ret <4 x i32> %t4
+}
+
+; CHECK-LABEL: t10
+; CHECK: smax
+; CHECK: smax
+define <8 x i32> @t10(<8 x i32> %a, <8 x i32> %b) {
+ %t1 = icmp sgt <8 x i32> %a, %b
+ %t2 = select <8 x i1> %t1, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %t2
+}
+
+; CHECK-LABEL: t11
+; CHECK: smin
+; CHECK: smin
+; CHECK: smin
+; CHECK: smin
+define <16 x i32> @t11(<16 x i32> %a, <16 x i32> %b) {
+ %t1 = icmp sle <16 x i32> %a, %b
+ %t2 = select <16 x i1> %t1, <16 x i32> %a, <16 x i32> %b
+ ret <16 x i32> %t2
+}
More information about the llvm-commits
mailing list