[llvm] 1e113c0 - [AArch64][SVE] Fix umin/umax lowering to handle out of range imm.
Huihui Zhang via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 23 09:44:15 PDT 2020
Author: Huihui Zhang
Date: 2020-10-23T09:42:56-07:00
New Revision: 1e113c078a560ad71e838ab9cf6719a3d4f1ec6d
URL: https://github.com/llvm/llvm-project/commit/1e113c078a560ad71e838ab9cf6719a3d4f1ec6d
DIFF: https://github.com/llvm/llvm-project/commit/1e113c078a560ad71e838ab9cf6719a3d4f1ec6d.diff
LOG: [AArch64][SVE] Fix umin/umax lowering to handle out of range imm.
Immediate must be in an integer range [0,255] for umin/umax instruction.
Extend pattern matching helper SelectSVEArithImm() to take in value type
bitwidth when checking immediate value is in range or not.
Reviewed By: sdesmalen
Differential Revision: https://reviews.llvm.org/D89831
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 16820fea0a7d..07d9b7502a65 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -191,6 +191,11 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return SelectSVELogicalImm(N, VT, Imm);
}
+ template <MVT::SimpleValueType VT>
+ bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
+ return SelectSVEArithImm(N, VT, Imm);
+ }
+
template <unsigned Low, unsigned High, bool AllowSaturation = false>
bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
@@ -327,7 +332,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
bool AllowSaturation, SDValue &Imm);
- bool SelectSVEArithImm(SDValue N, SDValue &Imm);
+ bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
SDValue &Offset);
};
@@ -3128,13 +3133,28 @@ bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
return false;
}
-bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, SDValue &Imm) {
+bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
- uint64_t ImmVal = CNode->getSExtValue();
- SDLoc DL(N);
- ImmVal = ImmVal & 0xFF;
+ uint64_t ImmVal = CNode->getZExtValue();
+
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ ImmVal &= 0xFF;
+ break;
+ case MVT::i16:
+ ImmVal &= 0xFFFF;
+ break;
+ case MVT::i32:
+ ImmVal &= 0xFFFFFFFF;
+ break;
+ case MVT::i64:
+ break;
+ default:
+ llvm_unreachable("Unexpected type");
+ }
+
if (ImmVal < 256) {
- Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
return true;
}
}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index a0eafa13d052..97cb64f22e9f 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -206,7 +206,10 @@ def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>",
def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;
-def SVEArithUImmPat : ComplexPattern<i32, 1, "SelectSVEArithImm", []>;
+def SVEArithUImm8Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i8>", []>;
+def SVEArithUImm16Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i16>", []>;
+def SVEArithUImm32Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i32>", []>;
+def SVEArithUImm64Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i64>", []>;
def SVEArithSImmPat : ComplexPattern<i32, 1, "SelectSVESignedArithImm", []>;
def SVEShiftImmL8 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 7>", []>;
@@ -3981,10 +3984,10 @@ multiclass sve_int_arith_imm1_unsigned<bits<2> opc, string asm, SDPatternOperato
def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>;
def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImmPat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImm8Pat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImm16Pat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImm32Pat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImm64Pat, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll
index 2e9620f113fa..a45c759db58e 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll
@@ -51,6 +51,20 @@ define <vscale x 8 x i16> @smax_i16_neg(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %res
}
+define <vscale x 8 x i16> @smax_i16_out_of_range(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: smax_i16_out_of_range:
+; CHECK: mov w8, #257
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %cmp = icmp sgt <vscale x 8 x i16> %a, %splat
+ %res = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %splat
+ ret <vscale x 8 x i16> %res
+}
+
define <vscale x 4 x i32> @smax_i32_pos(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smax_i32_pos
; CHECK: smax z0.s, z0.s, #27
@@ -73,6 +87,20 @@ define <vscale x 4 x i32> @smax_i32_neg(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %res
}
+define <vscale x 4 x i32> @smax_i32_out_of_range(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: smax_i32_out_of_range:
+; CHECK: mov w8, #-129
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %elt = insertelement <vscale x 4 x i32> undef, i32 -129, i32 0
+ %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %cmp = icmp sgt <vscale x 4 x i32> %a, %splat
+ %res = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %splat
+ ret <vscale x 4 x i32> %res
+}
+
define <vscale x 2 x i64> @smax_i64_pos(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smax_i64_pos
; CHECK: smax z0.d, z0.d, #27
@@ -95,6 +123,20 @@ define <vscale x 2 x i64> @smax_i64_neg(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %res
}
+define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: smax_i64_out_of_range:
+; CHECK: mov w8, #65535
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
+ %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %cmp = icmp sgt <vscale x 2 x i64> %a, %splat
+ %res = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %splat
+ ret <vscale x 2 x i64> %res
+}
+
;
; SMIN
;
@@ -142,6 +184,20 @@ define <vscale x 8 x i16> @smin_i16_neg(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %res
}
+define <vscale x 8 x i16> @smin_i16_out_of_range(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: smin_i16_out_of_range:
+; CHECK: mov w8, #257
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %cmp = icmp slt <vscale x 8 x i16> %a, %splat
+ %res = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %splat
+ ret <vscale x 8 x i16> %res
+}
+
define <vscale x 4 x i32> @smin_i32_pos(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smin_i32_pos
; CHECK: smin z0.s, z0.s, #27
@@ -164,6 +220,20 @@ define <vscale x 4 x i32> @smin_i32_neg(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %res
}
+define <vscale x 4 x i32> @smin_i32_out_of_range(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: smin_i32_out_of_range:
+; CHECK: mov w8, #-129
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %elt = insertelement <vscale x 4 x i32> undef, i32 -129, i32 0
+ %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %cmp = icmp slt <vscale x 4 x i32> %a, %splat
+ %res = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %splat
+ ret <vscale x 4 x i32> %res
+}
+
define <vscale x 2 x i64> @smin_i64_pos(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smin_i64_pos
; CHECK: smin z0.d, z0.d, #27
@@ -186,6 +256,20 @@ define <vscale x 2 x i64> @smin_i64_neg(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %res
}
+define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: smin_i64_out_of_range:
+; CHECK: mov w8, #65535
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
+ %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %cmp = icmp slt <vscale x 2 x i64> %a, %splat
+ %res = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %splat
+ ret <vscale x 2 x i64> %res
+}
+
;
; UMAX
;
@@ -222,11 +306,14 @@ define <vscale x 8 x i16> @umax_i16_pos(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %res
}
-define <vscale x 8 x i16> @umax_i16_large(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: umax_i16_large
-; CHECK: umax z0.h, z0.h, #129
-; CHECK-NEXT: ret
- %elt = insertelement <vscale x 8 x i16> undef, i16 129, i32 0
+define <vscale x 8 x i16> @umax_i16_out_of_range(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: umax_i16_out_of_range:
+; CHECK: mov w8, #257
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
%cmp = icmp ugt <vscale x 8 x i16> %a, %splat
%res = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %splat
@@ -244,11 +331,14 @@ define <vscale x 4 x i32> @umax_i32_pos(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %res
}
-define <vscale x 4 x i32> @umax_i32_large(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: umax_i32_large
-; CHECK: umax z0.s, z0.s, #129
-; CHECK-NEXT: ret
- %elt = insertelement <vscale x 4 x i32> undef, i32 129, i32 0
+define <vscale x 4 x i32> @umax_i32_out_of_range(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: umax_i32_out_of_range:
+; CHECK: mov w8, #257
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%cmp = icmp ugt <vscale x 4 x i32> %a, %splat
%res = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %splat
@@ -266,11 +356,14 @@ define <vscale x 2 x i64> @umax_i64_pos(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %res
}
-define <vscale x 2 x i64> @umax_i64_large(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: umax_i64_large
-; CHECK: umax z0.d, z0.d, #129
-; CHECK-NEXT: ret
- %elt = insertelement <vscale x 2 x i64> undef, i64 129, i32 0
+define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: umax_i64_out_of_range:
+; CHECK: mov w8, #65535
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
%cmp = icmp ugt <vscale x 2 x i64> %a, %splat
%res = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %splat
@@ -313,11 +406,14 @@ define <vscale x 8 x i16> @umin_i16_pos(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %res
}
-define <vscale x 8 x i16> @umin_i16_large(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: umin_i16_large
-; CHECK: umin z0.h, z0.h, #129
-; CHECK-NEXT: ret
- %elt = insertelement <vscale x 8 x i16> undef, i16 129, i32 0
+define <vscale x 8 x i16> @umin_i16_out_of_range(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: umin_i16_out_of_range:
+; CHECK: mov w8, #257
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
%cmp = icmp ult <vscale x 8 x i16> %a, %splat
%res = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %splat
@@ -335,11 +431,14 @@ define <vscale x 4 x i32> @umin_i32_pos(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %res
}
-define <vscale x 4 x i32> @umin_i32_large(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: umin_i32_large
-; CHECK: umin z0.s, z0.s, #129
-; CHECK-NEXT: ret
- %elt = insertelement <vscale x 4 x i32> undef, i32 129, i32 0
+define <vscale x 4 x i32> @umin_i32_out_of_range(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: umin_i32_out_of_range:
+; CHECK: mov w8, #257
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%cmp = icmp ult <vscale x 4 x i32> %a, %splat
%res = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %splat
@@ -357,11 +456,14 @@ define <vscale x 2 x i64> @umin_i64_pos(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %res
}
-define <vscale x 2 x i64> @umin_i64_large(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: umin_i64_large
-; CHECK: umin z0.d, z0.d, #129
-; CHECK-NEXT: ret
- %elt = insertelement <vscale x 2 x i64> undef, i64 129, i32 0
+define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: umin_i64_out_of_range:
+; CHECK: mov w8, #65535
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
%cmp = icmp ult <vscale x 2 x i64> %a, %splat
%res = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %splat
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
index c70686d3447c..9bc41649b5ea 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
@@ -35,6 +35,23 @@ define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %out
}
+define <vscale x 8 x i16> @smax_i16_out_of_range(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: smax_i16_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #129
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %elt = insertelement <vscale x 8 x i16> undef, i16 129, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %splat)
+ ret <vscale x 8 x i16> %out
+}
+
define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smax_i32:
; CHECK: // %bb.0:
@@ -49,6 +66,23 @@ define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %out
}
+define <vscale x 4 x i32> @smax_i32_out_of_range(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: smax_i32_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-129
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %elt = insertelement <vscale x 4 x i32> undef, i32 -129, i32 0
+ %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %splat)
+ ret <vscale x 4 x i32> %out
+}
+
define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smax_i64:
; CHECK: // %bb.0:
@@ -63,6 +97,24 @@ define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %out
}
+define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: smax_i64_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #65535
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
+ %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %splat)
+ ret <vscale x 2 x i64> %out
+}
+
+
; SMIN
define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a) {
@@ -93,6 +145,23 @@ define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %out
}
+define <vscale x 8 x i16> @smin_i16_out_of_range(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: smin_i16_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-129
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %elt = insertelement <vscale x 8 x i16> undef, i16 -129, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %splat)
+ ret <vscale x 8 x i16> %out
+}
+
define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smin_i32:
; CHECK: // %bb.0:
@@ -107,6 +176,24 @@ define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %out
}
+define <vscale x 4 x i32> @smin_i32_out_of_range(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: smin_i32_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #257
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
+ %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %splat)
+ ret <vscale x 4 x i32> %out
+}
+
+
define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smin_i64:
; CHECK: // %bb.0:
@@ -121,6 +208,22 @@ define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %out
}
+define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: smin_i64_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.d, #-256 // =0xffffffffffffff00
+; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %elt = insertelement <vscale x 2 x i64> undef, i64 -256, i64 0
+ %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %splat)
+ ret <vscale x 2 x i64> %out
+}
+
; UMAX
define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a) {
@@ -151,6 +254,23 @@ define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %out
}
+define <vscale x 8 x i16> @umax_i16_out_of_range(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: umax_i16_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #257
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %splat)
+ ret <vscale x 8 x i16> %out
+}
+
define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: umax_i32:
; CHECK: // %bb.0:
@@ -165,6 +285,23 @@ define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %out
}
+define <vscale x 4 x i32> @umax_i32_out_of_range(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: umax_i32_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #257
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
+ %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %splat)
+ ret <vscale x 4 x i32> %out
+}
+
define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umax_i64:
; CHECK: // %bb.0:
@@ -179,6 +316,23 @@ define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %out
}
+define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: umax_i64_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #65535
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
+ %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %splat)
+ ret <vscale x 2 x i64> %out
+}
+
; UMIN
define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a) {
@@ -209,6 +363,23 @@ define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %out
}
+define <vscale x 8 x i16> @umin_i16_out_of_range(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: umin_i16_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #257
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %splat)
+ ret <vscale x 8 x i16> %out
+}
+
define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: umin_i32:
; CHECK: // %bb.0:
@@ -223,6 +394,23 @@ define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %out
}
+define <vscale x 4 x i32> @umin_i32_out_of_range(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: umin_i32_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #257
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
+ %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %splat)
+ ret <vscale x 4 x i32> %out
+}
+
define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umin_i64:
; CHECK: // %bb.0:
@@ -237,6 +425,23 @@ define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %out
}
+define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: umin_i64_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #65535
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
+ %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %splat)
+ ret <vscale x 2 x i64> %out
+}
+
; SQADD
define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {
More information about the llvm-commits
mailing list