[llvm] r260024 - AVX512: VPBROADCASTB/W/D/Q from GPR intrinsics implementation.
Igor Breger via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 7 00:30:51 PST 2016
Author: ibreger
Date: Sun Feb 7 02:30:50 2016
New Revision: 260024
URL: http://llvm.org/viewvc/llvm-project?rev=260024&view=rev
Log:
AVX512: VPBROADCASTB/W/D/Q from GPR intrinsics implementation.
Differential Revision: http://reviews.llvm.org/D16813
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx-isa-check.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=260024&r1=260023&r2=260024&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Sun Feb 7 02:30:50 2016
@@ -2944,18 +2944,62 @@ let TargetPrefix = "x86" in { // All in
// Vector load with broadcast
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx512_mask_pbroadcast_b_gpr_128 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastb128_gpr_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pbroadcast_b_gpr_256 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastb256_gpr_mask">,
+ Intrinsic<[llvm_v32i8_ty],
+ [llvm_i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pbroadcast_b_gpr_512 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastb512_gpr_mask">,
+ Intrinsic<[llvm_v64i8_ty],
+ [llvm_i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pbroadcast_w_gpr_128 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastw128_gpr_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pbroadcast_w_gpr_256 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastw256_gpr_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pbroadcast_w_gpr_512 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastw512_gpr_mask">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pbroadcast_d_gpr_128 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastd128_gpr_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pbroadcast_d_gpr_256 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastd256_gpr_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pbroadcast_d_gpr_512 :
- GCCBuiltin<"__builtin_ia32_pbroadcastd512_gpr_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_v16i32_ty,
- llvm_i16_ty], [IntrNoMem]>;
+ GCCBuiltin<"__builtin_ia32_pbroadcastd512_gpr_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pbroadcast_q_gpr_128 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastq128_gpr_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pbroadcast_q_gpr_256 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastq256_gpr_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pbroadcast_q_gpr_512 :
- GCCBuiltin<"__builtin_ia32_pbroadcastq512_gpr_mask">,
- Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty, llvm_v8i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
+ GCCBuiltin<"__builtin_ia32_pbroadcastq512_gpr_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
def int_x86_avx512_mask_pbroadcast_q_mem_512 :
- GCCBuiltin<"__builtin_ia32_pbroadcastq512_mem_mask">,
- Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty, llvm_v8i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
+ GCCBuiltin<"__builtin_ia32_pbroadcastq512_mem_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Vector permutation
@@ -6119,11 +6163,6 @@ let TargetPrefix = "x86" in { // All in
Intrinsic<[llvm_v8i64_ty],
[llvm_v4i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_pbroadcastd_i32_512 :
- Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_pbroadcastq_i64_512 :
- Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>;
def int_x86_avx512_broadcastmw_512 :
GCCBuiltin<"__builtin_ia32_broadcastmw512">,
Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>;
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=260024&r1=260023&r2=260024&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Feb 7 02:30:50 2016
@@ -913,9 +913,10 @@ def : Pat<(int_x86_avx512_vbroadcast_sd_
multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _,
RegisterClass SrcRC> {
- defm r : AVX512_maskable_in_asm<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins SrcRC:$src), "vpbroadcast"##_.Suffix,
- "$src", "$src", []>, T8PD, EVEX;
+ defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins SrcRC:$src),
+ "vpbroadcast"##_.Suffix, "$src", "$src",
+ (_.VT (X86VBroadcast SrcRC:$src))>, T8PD, EVEX;
}
multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
@@ -928,10 +929,18 @@ multiclass avx512_int_broadcast_reg_vl<b
}
}
-defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, GR32,
+let isCodeGenOnly = 1 in {
+defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, GR8,
HasBWI>;
-defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, GR32,
+defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, GR16,
HasBWI>;
+}
+let isAsmParserOnly = 1 in {
+ defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info,
+ GR32, HasBWI>;
+ defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info,
+ GR32, HasBWI>;
+}
defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, GR32,
HasAVX512>;
defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, GR64,
@@ -939,27 +948,9 @@ defm VPBROADCASTQr : avx512_int_broadcas
def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
(VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
-
def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
(VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
-def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
- (VPBROADCASTDrZr GR32:$src)>;
-def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
- (VPBROADCASTQrZr GR64:$src)>;
-
-def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
- (VPBROADCASTDrZr GR32:$src)>;
-def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
- (VPBROADCASTQrZr GR64:$src)>;
-
-def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
- (v16i32 immAllZerosV), (i16 GR16:$mask))),
- (VPBROADCASTDrZrkz (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
-def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
- (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
- (VPBROADCASTQrZrkz (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
-
// Provide aliases for broadcast from the same register class that
// automatically does the extract.
multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=260024&r1=260023&r2=260024&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun Feb 7 02:30:50 2016
@@ -8410,50 +8410,53 @@ let Predicates = [HasAVX2] in {
(VBROADCASTSDYrr (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src),
sub_xmm)))>;
- // Provide fallback in case the load node that is used in the patterns above
- // is used by additional users, which prevents the pattern selection.
+// Provide fallback in case the load node that is used in the patterns above
+// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
- def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
- (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
- def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
- (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
- def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
- (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
-
- def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
- def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
- (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
- def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
- (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
-
- def : Pat<(v16i8 (X86VBroadcast GR8:$src)),
- (VPBROADCASTBrr (COPY_TO_REGCLASS
- (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
- VR128))>;
- def : Pat<(v32i8 (X86VBroadcast GR8:$src)),
- (VPBROADCASTBYrr (COPY_TO_REGCLASS
- (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
- VR128))>;
-
- def : Pat<(v8i16 (X86VBroadcast GR16:$src)),
- (VPBROADCASTWrr (COPY_TO_REGCLASS
- (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)),
- VR128))>;
- def : Pat<(v16i16 (X86VBroadcast GR16:$src)),
- (VPBROADCASTWYrr (COPY_TO_REGCLASS
- (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)),
- VR128))>;
-
- // The patterns for VPBROADCASTD are not needed because they would match
- // the exact same thing as VBROADCASTSS patterns.
-
- def : Pat<(v2i64 (X86VBroadcast GR64:$src)),
- (VPBROADCASTQrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
- // The v4i64 pattern is not needed because VBROADCASTSDYrr already match.
+ def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
+ (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
+ (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
+ (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
}
}
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI], AddedComplexity = 20 in {
+ def : Pat<(v16i8 (X86VBroadcast GR8:$src)),
+ (VPBROADCASTBrr (COPY_TO_REGCLASS
+ (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
+ VR128))>;
+ def : Pat<(v32i8 (X86VBroadcast GR8:$src)),
+ (VPBROADCASTBYrr (COPY_TO_REGCLASS
+ (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
+ VR128))>;
+
+ def : Pat<(v8i16 (X86VBroadcast GR16:$src)),
+ (VPBROADCASTWrr (COPY_TO_REGCLASS
+ (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)),
+ VR128))>;
+ def : Pat<(v16i16 (X86VBroadcast GR16:$src)),
+ (VPBROADCASTWYrr (COPY_TO_REGCLASS
+ (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)),
+ VR128))>;
+}
+let Predicates = [HasAVX2, NoVLX], AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
+ (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
+ def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
+ (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
+ def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
+ (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
+
+ // The patterns for VPBROADCASTD are not needed because they would match
+ // the exact same thing as VBROADCASTSS patterns.
+
+ def : Pat<(v2i64 (X86VBroadcast GR64:$src)),
+ (VPBROADCASTQrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
+ // The v4i64 pattern is not needed because VBROADCASTSDYrr already match.
+}
+
// AVX1 broadcast patterns
let Predicates = [HasAVX1Only] in {
def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
@@ -8464,10 +8467,9 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32
(VBROADCASTSSrm addr:$src)>;
}
-let Predicates = [HasAVX] in {
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
- let AddedComplexity = 20 in {
+let Predicates = [HasAVX], AddedComplexity = 20 in {
// 128bit broadcasts:
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
(VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
@@ -8480,6 +8482,11 @@ let Predicates = [HasAVX] in {
(VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
(VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
+ def : Pat<(v2f64 (X86VBroadcast f64:$src)),
+ (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
+}
+
+let Predicates = [HasAVX, NoVLX], AddedComplexity = 20 in {
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
(VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
@@ -8490,12 +8497,9 @@ let Predicates = [HasAVX] in {
(VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm),
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
- }
-
- def : Pat<(v2f64 (X86VBroadcast f64:$src)),
- (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
def : Pat<(v2i64 (X86VBroadcast i64:$src)),
- (VMOVDDUPrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
+ (VMOVDDUPrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
}
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=260024&r1=260023&r2=260024&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sun Feb 7 02:30:50 2016
@@ -1037,6 +1037,30 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_mask_pavg_w_128, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
X86_INTRINSIC_DATA(avx512_mask_pavg_w_256, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
X86_INTRINSIC_DATA(avx512_mask_pavg_w_512, INTR_TYPE_2OP_MASK, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pbroadcast_b_gpr_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pbroadcast_b_gpr_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pbroadcast_b_gpr_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pbroadcast_d_gpr_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pbroadcast_d_gpr_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pbroadcast_d_gpr_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pbroadcast_q_gpr_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pbroadcast_q_gpr_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pbroadcast_q_gpr_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pbroadcast_w_gpr_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pbroadcast_w_gpr_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pbroadcast_w_gpr_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
Modified: llvm/trunk/test/CodeGen/X86/avx-isa-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-isa-check.ll?rev=260024&r1=260023&r2=260024&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-isa-check.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-isa-check.ll Sun Feb 7 02:30:50 2016
@@ -1,5 +1,6 @@
; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
-
+
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=corei7-avx -o /dev/null
; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null
; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null
; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null
@@ -576,6 +577,78 @@ entry:
ret <8 x i16> %C
}
+define <32 x i8> @_broadcast32xi8(i8 %a) {
+ %b = insertelement <32 x i8> undef, i8 %a, i32 0
+ %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+ ret <32 x i8> %c
+}
+
+define <16 x i8> @_broadcast16xi8(i8 %a) {
+ %b = insertelement <16 x i8> undef, i8 %a, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %c
+}
+
+define <16 x i16> @_broadcast16xi16(i16 %a) {
+ %b = insertelement <16 x i16> undef, i16 %a, i32 0
+ %c = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+ ret <16 x i16> %c
+}
+
+define <8 x i16> @_broadcast8xi16(i16 %a) {
+ %b = insertelement <8 x i16> undef, i16 %a, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %c
+}
+
+define <8 x i32> @_broadcast8xi32(i32 %a) {
+ %b = insertelement <8 x i32> undef, i32 %a, i32 0
+ %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+ ret <8 x i32> %c
+}
+
+define <4 x i32> @_broadcast4xi32(i32 %a) {
+ %b = insertelement <4 x i32> undef, i32 %a, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %c
+}
+
+define <4 x i64> @_broadcast4xi64(i64 %a) {
+ %b = insertelement <4 x i64> undef, i64 %a, i64 0
+ %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
+ ret <4 x i64> %c
+}
+
+define <2 x i64> @_broadcast2xi64(i64 %a) {
+ %b = insertelement <2 x i64> undef, i64 %a, i64 0
+ %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %c
+}
+
+define <8 x float> @_broadcast8xfloat(float %a) {
+ %b = insertelement <8 x float> undef, float %a, i32 0
+ %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
+ ret <8 x float> %c
+}
+
+define <4 x float> @_broadcast4xfloat(float %a) {
+ %b = insertelement <4 x float> undef, float %a, i32 0
+ %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %c
+}
+
+define <4 x double> @_broadcast4xdouble(double %a) {
+ %b = insertelement <4 x double> undef, double %a, i32 0
+ %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
+ ret <4 x double> %c
+}
+
+define <2 x double> @_broadcast2xdouble(double %a) {
+ %b = insertelement <2 x double> undef, double %a, i32 0
+ %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
+ ret <2 x double> %c
+}
+
define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
%x = fmul <4 x float> %a0, %a1
%res = fsub <4 x float> %x, %a2
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=260024&r1=260023&r2=260024&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sun Feb 7 02:30:50 2016
@@ -574,16 +574,6 @@ define <16 x i32>@test_int_x86_avx512_pb
}
declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
-define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
-; CHECK-LABEL: test_x86_pbroadcastd_i32_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastd %edi, %zmm0
-; CHECK-NEXT: retq
- %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
- ret <16 x i32> %res
-}
-declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
-
define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
; CHECK: ## BB#0:
@@ -603,16 +593,6 @@ define <8 x i64>@test_int_x86_avx512_pbr
}
declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
-define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
-; CHECK-LABEL: test_x86_pbroadcastq_i64_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastq %rdi, %zmm0
-; CHECK-NEXT: retq
- %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
- ret <8 x i64> %res
-}
-declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
-
define <16 x i32> @test_conflict_d(<16 x i32> %a) {
; CHECK-LABEL: test_conflict_d:
; CHECK: ## BB#0:
@@ -7357,6 +7337,45 @@ define i8 at test_int_x86_avx512_ptestnm_q_
ret i8 %res2
}
+define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1}
+; CHECK-NEXT: vpbroadcastd %edi, %zmm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastd %edi, %zmm2
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> zeroinitializer, i16 %mask)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res2, %res3
+ ret <16 x i32> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32, <16 x i32>, i16)
+
+define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1}
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm2
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> zeroinitializer,i8 %mask)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res2, %res3
+ ret <8 x i64> %res4
+}
+declare <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64, <8 x i64>, i8)
+
declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=260024&r1=260023&r2=260024&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Sun Feb 7 02:30:50 2016
@@ -3516,3 +3516,67 @@ define i32 at test_int_x86_avx512_ptestnm_w
%res2 = add i32 %res, %res1
ret i32 %res2
}
+
+declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rsi, %k1
+; AVX512BW-NEXT: vpbroadcastb %dil, %zmm0 {%k1}
+; AVX512BW-NEXT: vpbroadcastb %dil, %zmm1 {%k1} {z}
+; AVX512BW-NEXT: vpbroadcastb %dil, %zmm2
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movb {{[0-9]+}}(%esp), %al
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpbroadcastb %al, %zmm1 {%k1} {z}
+; AVX512F-32-NEXT: vpbroadcastb %al, %zmm0 {%k1}
+; AVX512F-32-NEXT: vpbroadcastb %al, %zmm2
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1)
+ %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask)
+ %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask)
+ %res3 = add <64 x i8> %res, %res1
+ %res4 = add <64 x i8> %res2, %res3
+ ret <64 x i8> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpbroadcastw %di, %zmm0 {%k1}
+; AVX512BW-NEXT: vpbroadcastw %di, %zmm1 {%k1} {z}
+; AVX512BW-NEXT: vpbroadcastw %di, %zmm2
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movw {{[0-9]+}}(%esp), %ax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm0 {%k1}
+; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm1 {%k1} {z}
+; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm2
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res2, %res3
+ ret <32 x i16> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=260024&r1=260023&r2=260024&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Sun Feb 7 02:30:50 2016
@@ -5450,3 +5450,82 @@ define i16 at test_int_x86_avx512_ptestnm_w
ret i16 %res2
}
+declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpbroadcastb %dil, %ymm0 {%k1}
+; CHECK-NEXT: vpbroadcastb %dil, %ymm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastb %dil, %ymm2
+; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask)
+ %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask)
+ %res3 = add <32 x i8> %res, %res1
+ %res4 = add <32 x i8> %res2, %res3
+ ret <32 x i8> %res4
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastb %dil, %xmm0 {%k1}
+; CHECK-NEXT: vpbroadcastb %dil, %xmm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastb %dil, %xmm2
+; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask)
+ %res3 = add <16 x i8> %res, %res1
+ %res4 = add <16 x i8> %res2, %res3
+ ret <16 x i8> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastw %di, %ymm0 {%k1}
+; CHECK-NEXT: vpbroadcastw %di, %ymm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastw %di, %ymm2
+; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res2, %res3
+ ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastw %di, %xmm0 {%k1}
+; CHECK-NEXT: vpbroadcastw %di, %xmm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastw %di, %xmm2
+; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res2, %res3
+ ret <8 x i16> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=260024&r1=260023&r2=260024&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Sun Feb 7 02:30:50 2016
@@ -8236,3 +8236,82 @@ define i8 at test_int_x86_avx512_ptestnm_q_
ret i8 %res2
}
+declare <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1}
+; CHECK-NEXT: vpbroadcastd %edi, %ymm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastd %edi, %ymm2
+; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> zeroinitializer, i8 %mask)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res2, %res3
+ ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_128(i32 %x0, <4 x i32> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1}
+; CHECK-NEXT: vpbroadcastd %edi, %xmm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastd %edi, %xmm2
+; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> zeroinitializer, i8 %mask)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res2, %res3
+ ret <4 x i32> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastq %rdi, %ymm2
+; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> zeroinitializer,i8 %mask)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res2, %res3
+ ret <4 x i64> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1}
+; CHECK-NEXT: vpbroadcastq %rdi, %xmm1 {%k1} {z}
+; CHECK-NEXT: vpbroadcastq %rdi, %xmm2
+; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> zeroinitializer,i8 %mask)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res2, %res3
+ ret <2 x i64> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=260024&r1=260023&r2=260024&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Sun Feb 7 02:30:50 2016
@@ -637,8 +637,7 @@ define <16 x float> @test14(float* %base
; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
; SKX-NEXT: vpbroadcastq %xmm0, %zmm0
-; SKX-NEXT: vmovd %esi, %xmm1
-; SKX-NEXT: vpbroadcastd %xmm1, %ymm1
+; SKX-NEXT: vpbroadcastd %esi, %ymm1
; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
; SKX-NEXT: vpsllq $2, %zmm1, %zmm1
; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll?rev=260024&r1=260023&r2=260024&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll Sun Feb 7 02:30:50 2016
@@ -74,13 +74,13 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0
; AVX512F-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,1,0,3,7,7,0]
-; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1
-; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: movq {{.*}}(%rip), %rax
+; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -105,14 +105,14 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7
; AVX512F: # BB#0:
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
; AVX512F-NEXT: vpcmpeqd %zmm3, %zmm1, %k2
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k2} {z}
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z}
-; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
-; AVX512F-NEXT: vpermt2d %zmm1, %zmm3, %zmm2
-; AVX512F-NEXT: vpslld $31, %zmm2, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: movl {{.*}}(%rip), %eax
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT: vpermt2d %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vpslld $31, %zmm1, %zmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -163,13 +163,13 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u
; AVX512F-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} {z}
-; AVX512F-NEXT: vextracti32x4 $1, %zmm1, %xmm1
-; AVX512F-NEXT: vpbroadcastq %xmm1, %zmm1
-; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: movq {{.*}}(%rip), %rax
+; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm0
+; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: retq
;
@@ -310,12 +310,12 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: movb $51, %al
; AVX512F-NEXT: kmovw %eax, %k2
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2} {z}
-; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: movq {{.*}}(%rip), %rax
+; AVX512F-NEXT: vpbroadcastq %rax, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpbroadcastq %rax, %zmm1 {%k1} {z}
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
-; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm0
+; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: retq
More information about the llvm-commits
mailing list