[llvm] r235875 - AVX-512: Extend/Truncate operations for SKX,
Elena Demikhovsky
elena.demikhovsky at intel.com
Mon Apr 27 05:57:59 PDT 2015
Author: delena
Date: Mon Apr 27 07:57:59 2015
New Revision: 235875
URL: http://llvm.org/viewvc/llvm-project?rev=235875&view=rev
Log:
AVX-512: Extend/Truncate operations for SKX,
SETCC for bit-vectors
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll
llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=235875&r1=235874&r2=235875&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Apr 27 07:57:59 2015
@@ -1303,6 +1303,10 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
+ if (Subtarget->hasDQI()) {
+ setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
+ }
setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
@@ -1313,7 +1317,10 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
-
+ if (Subtarget->hasDQI()) {
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
+ }
setOperationAction(ISD::FFLOOR, MVT::v16f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v8f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v16f32, Legal);
@@ -12055,6 +12062,23 @@ SDValue X86TargetLowering::LowerTRUNCATE
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
+ // move vector to mask - truncate solution for SKX
+ if (VT.getVectorElementType() == MVT::i1) {
+ if (InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 &&
+ Subtarget->hasBWI())
+ return Op; // legal, will go to VPMOVB2M, VPMOVW2M
+ if ((InVT.is256BitVector() || InVT.is128BitVector())
+ && InVT.getScalarSizeInBits() <= 16 &&
+ Subtarget->hasBWI() && Subtarget->hasVLX())
+ return Op; // legal, will go to VPMOVB2M, VPMOVW2M
+ if (InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 &&
+ Subtarget->hasDQI())
+ return Op; // legal, will go to VPMOVD2M, VPMOVQ2M
+ if ((InVT.is256BitVector() || InVT.is128BitVector())
+ && InVT.getScalarSizeInBits() >= 32 &&
+ Subtarget->hasDQI() && Subtarget->hasVLX())
+ return Op; // legal, will go to VPMOVB2M, VPMOVQ2M
+ }
if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
if (VT.getVectorElementType().getSizeInBits() >=8)
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
@@ -13001,6 +13025,49 @@ static SDValue Lower256IntVSETCC(SDValue
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
}
+static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ MVT VT = Op.getSimpleValueType();
+ SDLoc dl(Op);
+
+ assert(Op0.getValueType().getVectorElementType() == MVT::i1 &&
+ "Unexpected type for boolean compare operation");
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ SDValue NotOp0 = DAG.getNode(ISD::XOR, dl, VT, Op0,
+ DAG.getConstant(-1, VT));
+ SDValue NotOp1 = DAG.getNode(ISD::XOR, dl, VT, Op1,
+ DAG.getConstant(-1, VT));
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Unexpected SETCC condition");
+ case ISD::SETNE:
+ // (x != y) -> ~(x ^ y)
+ return DAG.getNode(ISD::XOR, dl, VT,
+ DAG.getNode(ISD::XOR, dl, VT, Op0, Op1),
+ DAG.getConstant(-1, VT));
+ case ISD::SETEQ:
+ // (x == y) -> (x ^ y)
+ return DAG.getNode(ISD::XOR, dl, VT, Op0, Op1);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ // (x > y) -> (x & ~y)
+ return DAG.getNode(ISD::AND, dl, VT, Op0, NotOp1);
+ case ISD::SETULT:
+ case ISD::SETLT:
+ // (x < y) -> (~x & y)
+ return DAG.getNode(ISD::AND, dl, VT, NotOp0, Op1);
+ case ISD::SETULE:
+ case ISD::SETLE:
+ // (x <= y) -> (~x | y)
+ return DAG.getNode(ISD::OR, dl, VT, NotOp0, Op1);
+ case ISD::SETUGE:
+ case ISD::SETGE:
+ // (x >=y) -> (x | ~y)
+ return DAG.getNode(ISD::OR, dl, VT, Op0, NotOp1);
+ }
+}
+
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDValue Op0 = Op.getOperand(0);
@@ -13119,8 +13186,11 @@ static SDValue LowerVSETCC(SDValue Op, c
if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntVSETCC(Op, DAG);
- bool MaskResult = (VT.getVectorElementType() == MVT::i1);
EVT OpVT = Op1.getValueType();
+ if (OpVT.getVectorElementType() == MVT::i1)
+ return LowerBoolVSETCC_AVX512(Op, DAG);
+
+ bool MaskResult = (VT.getVectorElementType() == MVT::i1);
if (Subtarget->hasAVX512()) {
if (Op1.getValueType().is512BitVector() ||
(Subtarget->hasBWI() && Subtarget->hasVLX()) ||
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=235875&r1=235874&r2=235875&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Apr 27 07:57:59 2015
@@ -1906,18 +1906,21 @@ def : Pat<(xor VK64:$src1, (v64i1 immAll
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
-
def : Pat<(not VK8:$src),
(COPY_TO_REGCLASS
(KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
}
+def : Pat<(xor VK4:$src1, (v4i1 immAllOnesV)),
+ (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src1, VK16)), VK4)>;
+def : Pat<(xor VK2:$src1, (v2i1 immAllOnesV)),
+ (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src1, VK16)), VK2)>;
// Mask binary operation
// - KAND, KANDN, KOR, KXNOR, KXOR
multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
RegisterClass KRC, SDPatternOperator OpNode,
- Predicate prd> {
- let Predicates = [prd] in
+ Predicate prd, bit IsCommutable> {
+ let Predicates = [prd], isCommutable = IsCommutable in
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -1925,40 +1928,25 @@ multiclass avx512_mask_binop<bits<8> opc
}
multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode> {
+ SDPatternOperator OpNode, bit IsCommutable> {
defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
- HasDQI>, VEX_4V, VEX_L, PD;
+ HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
- HasAVX512>, VEX_4V, VEX_L, PS;
+ HasAVX512, IsCommutable>, VEX_4V, VEX_L, PS;
defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
- HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
+ HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
- HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
+ HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
}
def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
-let isCommutable = 1 in {
- defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
- defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
- defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
- defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
-}
-let isCommutable = 0 in
- defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
-
-def : Pat<(xor VK1:$src1, VK1:$src2),
- (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
- (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
-
-def : Pat<(or VK1:$src1, VK1:$src2),
- (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
- (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
-
-def : Pat<(and VK1:$src1, VK1:$src2),
- (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
- (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
+defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>;
+defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>;
+defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>;
+defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>;
+defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>;
multiclass avx512_mask_binop_int<string IntName, string InstName> {
let Predicates = [HasAVX512] in
@@ -1975,13 +1963,28 @@ defm : avx512_mask_binop_int<"kor", "K
defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
defm : avx512_mask_binop_int<"kxor", "KXOR">;
-// With AVX-512, 8-bit mask is promoted to 16-bit mask.
multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
- let Predicates = [HasAVX512] in
- def : Pat<(OpNode VK8:$src1, VK8:$src2),
- (COPY_TO_REGCLASS
- (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
- (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
+ // With AVX512F, 8-bit mask is promoted to 16-bit mask,
+ // for the DQI set, this type is legal and KxxxB instruction is used
+ let Predicates = [NoDQI] in
+ def : Pat<(OpNode VK8:$src1, VK8:$src2),
+ (COPY_TO_REGCLASS
+ (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
+ (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
+
+ // All types smaller than 8 bits require conversion anyway
+ def : Pat<(OpNode VK1:$src1, VK1:$src2),
+ (COPY_TO_REGCLASS (Inst
+ (COPY_TO_REGCLASS VK1:$src1, VK16),
+ (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
+ def : Pat<(OpNode VK2:$src1, VK2:$src2),
+ (COPY_TO_REGCLASS (Inst
+ (COPY_TO_REGCLASS VK2:$src1, VK16),
+ (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
+ def : Pat<(OpNode VK4:$src1, VK4:$src2),
+ (COPY_TO_REGCLASS (Inst
+ (COPY_TO_REGCLASS VK4:$src1, VK16),
+ (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
}
defm : avx512_binop_pat<and, KANDWrr>;
@@ -1990,6 +1993,32 @@ defm : avx512_binop_pat<or, KORWrr>;
defm : avx512_binop_pat<xnor, KXNORWrr>;
defm : avx512_binop_pat<xor, KXORWrr>;
+def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)),
+ (KXNORWrr VK16:$src1, VK16:$src2)>;
+def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
+ (KXNORBrr VK8:$src1, VK8:$src2)>;
+def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)),
+ (KXNORDrr VK32:$src1, VK32:$src2)>;
+def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)),
+ (KXNORQrr VK64:$src1, VK64:$src2)>;
+
+let Predicates = [NoDQI] in
+def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
+ (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK8:$src1, VK16),
+ (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
+
+def : Pat<(xor (xor VK4:$src1, VK4:$src2), (v4i1 immAllOnesV)),
+ (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK4:$src1, VK16),
+ (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
+
+def : Pat<(xor (xor VK2:$src1, VK2:$src2), (v2i1 immAllOnesV)),
+ (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK2:$src1, VK16),
+ (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
+
+def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)),
+ (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
+ (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
+
// Mask unpacking
multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
RegisterClass KRC> {
@@ -2085,6 +2114,8 @@ multiclass avx512_mask_setop<RegisterCla
multiclass avx512_mask_setop_w<PatFrag Val> {
defm B : avx512_mask_setop<VK8, v8i1, Val>;
defm W : avx512_mask_setop<VK16, v16i1, Val>;
+ defm D : avx512_mask_setop<VK32, v32i1, Val>;
+ defm Q : avx512_mask_setop<VK64, v64i1, Val>;
}
defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
@@ -2094,6 +2125,8 @@ defm KSET1 : avx512_mask_setop_w<immAllO
let Predicates = [HasAVX512] in {
def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
+ def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
+ def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=235875&r1=235874&r2=235875&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Mon Apr 27 07:57:59 2015
@@ -131,4 +131,30 @@ entry:
%mask = load <8 x i1>, <8 x i1>* %maskPtr
%mask_convert = bitcast <8 x i1> %mask to i8
ret i8 %mask_convert
+}
+
+; SKX-LABEL: test4
+; SKX: vpcmpgt
+; SKX: knot
+; SKX: vpcmpgt
+; SKX: vpmovm2d
+define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
+ %x_gt_y = icmp sgt <4 x i64> %x, %y
+ %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
+ %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
+ %resse = sext <4 x i1>%res to <4 x i32>
+ ret <4 x i32> %resse
+}
+
+; SKX-LABEL: test5
+; SKX: vpcmpgt
+; SKX: knot
+; SKX: vpcmpgt
+; SKX: vpmovm2q
+define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
+ %x_gt_y = icmp slt <2 x i64> %x, %y
+ %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
+ %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
+ %resse = sext <2 x i1>%res to <2 x i64>
+ ret <2 x i64> %resse
}
\ No newline at end of file
Modified: llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll?rev=235875&r1=235874&r2=235875&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll Mon Apr 27 07:57:59 2015
@@ -90,6 +90,8 @@ define <8 x i64> @zext_8i1_to_8xi64(i8
; CHECK: vpandd
; CHECK: vptestmd
; CHECK: ret
+; SKX-LABEL: trunc_16i8_to_16i1
+; SKX: vpmovb2m %xmm
define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
%mask_b = trunc <16 x i8>%a to <16 x i1>
%mask = bitcast <16 x i1> %mask_b to i16
@@ -100,17 +102,34 @@ define i16 @trunc_16i8_to_16i1(<16 x i8>
; CHECK: vpandd
; CHECK: vptestmd
; CHECK: ret
+; SKX-LABEL: trunc_16i32_to_16i1
+; SKX: vpmovd2m %zmm
define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
%mask_b = trunc <16 x i32>%a to <16 x i1>
%mask = bitcast <16 x i1> %mask_b to i16
ret i16 %mask
}
+; SKX-LABEL: trunc_4i32_to_4i1
+; SKX: vpmovd2m %xmm
+; SKX: kandw
+; SKX: vpmovm2d
+define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
+ %mask_a = trunc <4 x i32>%a to <4 x i1>
+ %mask_b = trunc <4 x i32>%b to <4 x i1>
+ %a_and_b = and <4 x i1>%mask_a, %mask_b
+ %res = sext <4 x i1>%a_and_b to <4 x i32>
+ ret <4 x i32>%res
+}
+
; CHECK-LABEL: trunc_8i16_to_8i1
; CHECK: vpmovsxwq
; CHECK: vpandq LCP{{.*}}(%rip){1to8}
; CHECK: vptestmq
; CHECK: ret
+
+; SKX-LABEL: trunc_8i16_to_8i1
+; SKX: vpmovw2m %xmm
define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
%mask_b = trunc <8 x i16>%a to <8 x i1>
%mask = bitcast <8 x i1> %mask_b to i8
Modified: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll?rev=235875&r1=235874&r2=235875&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll Mon Apr 27 07:57:59 2015
@@ -374,3 +374,27 @@ define <8 x i64> @test27(<8 x i64> %x, i
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
ret <8 x i64> %max
}
+
+; CHECK-LABEL: test28
+; CHECK: vpcmpgtq
+; CHECK: vpcmpgtq
+; CHECK: kxorw
+define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
+ %x_gt_y = icmp sgt <8 x i64> %x, %y
+ %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
+ %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
+ %resse = sext <8 x i1>%res to <8 x i32>
+ ret <8 x i32> %resse
+}
+
+; CHECK-LABEL: test29
+; CHECK: vpcmpgtd
+; CHECK: vpcmpgtd
+; CHECK: kxnorw
+define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
+ %x_gt_y = icmp sgt <16 x i32> %x, %y
+ %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
+ %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
+ %resse = sext <16 x i1>%res to <16 x i8>
+ ret <16 x i8> %resse
+}
\ No newline at end of file
More information about the llvm-commits
mailing list