[llvm] r224516 - [AVX512] Enable FP arithmetic lowering for AVX512VL subsets.
Robert Khasanov
rob.khasanov at gmail.com
Thu Dec 18 04:28:22 PST 2014
Author: rkhasanov
Date: Thu Dec 18 06:28:22 2014
New Revision: 224516
URL: http://llvm.org/viewvc/llvm-project?rev=224516&view=rev
Log:
[AVX512] Enable FP arithmetic lowering for AVX512VL subsets.
Added RegOp2MemOpTable4 to transform 4th operand from register to memory in merge-masked versions of instructions.
Added lowering tests.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrInfo.h
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx512-arith.ll
llvm/trunk/test/CodeGen/X86/avx512vl-arith.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=224516&r1=224515&r2=224516&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Dec 18 06:28:22 2014
@@ -6213,7 +6213,8 @@ static SDValue LowerVectorBroadcast(SDVa
if (!IsLoad)
return SDValue();
- if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))
+ if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
+ (Subtarget->hasVLX() && ScalarSize == 64))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=224516&r1=224515&r2=224516&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Thu Dec 18 06:28:22 2014
@@ -65,6 +65,7 @@ enum {
TB_INDEX_1 = 1,
TB_INDEX_2 = 2,
TB_INDEX_3 = 3,
+ TB_INDEX_4 = 4,
TB_INDEX_MASK = 0xf,
// Do not insert the reverse map (MemOp -> RegOp) into the table.
@@ -1337,6 +1338,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
{ X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
+ // AVX-512{F,VL} foldable instructions
+ { X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0 },
+ { X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 },
+ { X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 },
+ { X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 },
+
// AES foldable instructions
{ X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
{ X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 },
@@ -1521,7 +1528,46 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
{ X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
{ X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE }
+ { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
+ // AVX-512 arithmetic instructions
+ { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
+ { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
+ { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
+ { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
+ { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
+ { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
+ { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
+ { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
+ { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
+ { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
+ { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
+ { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
+ // AVX-512{F,VL} arithmetic instructions 256-bit
+ { X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 },
+ { X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 },
+ { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 },
+ { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 },
+ { X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
+ { X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 },
+ { X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 },
+ { X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 },
+ { X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 },
+ { X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 },
+ { X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 },
+ { X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 },
+ // AVX-512{F,VL} arithmetic instructions 128-bit
+ { X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 },
+ { X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 },
+ { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
+ { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
+ { X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
+ { X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 },
+ { X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 },
+ { X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 },
+ { X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 },
+ { X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 },
+ { X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 },
+ { X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
@@ -1534,6 +1580,57 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
}
+ static const X86OpTblEntry OpTbl4[] = {
+ // AVX-512 foldable instructions
+ { X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
+ { X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
+ { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
+ { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
+ { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
+ { X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
+ { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
+ { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
+ { X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
+ { X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
+ { X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
+ { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
+ // AVX-512{F,VL} foldable instructions 256-bit
+ { X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 },
+ { X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 },
+ { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 },
+ { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 },
+ { X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
+ { X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 },
+ { X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 },
+ { X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 },
+ { X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 },
+ { X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 },
+ { X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 },
+ { X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 },
+ // AVX-512{F,VL} foldable instructions 128-bit
+ { X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 },
+ { X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 },
+ { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 },
+ { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
+ { X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
+ { X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 },
+ { X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 },
+ { X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 },
+ { X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 },
+ { X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 },
+ { X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 },
+ { X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl4); i != e; ++i) {
+ unsigned RegOp = OpTbl4[i].RegOp;
+ unsigned MemOp = OpTbl4[i].MemOp;
+ unsigned Flags = OpTbl4[i].Flags;
+ AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
+ RegOp, MemOp,
+ // Index 4, folded load
+ Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
+ }
}
void
@@ -4249,6 +4346,8 @@ X86InstrInfo::foldMemoryOperandImpl(Mach
OpcodeTablePtr = &RegOp2MemOpTable2;
} else if (i == 3) {
OpcodeTablePtr = &RegOp2MemOpTable3;
+ } else if (i == 4) {
+ OpcodeTablePtr = &RegOp2MemOpTable4;
}
// If table selected...
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=224516&r1=224515&r2=224516&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Thu Dec 18 06:28:22 2014
@@ -152,6 +152,7 @@ class X86InstrInfo final : public X86Gen
RegOp2MemOpTableType RegOp2MemOpTable1;
RegOp2MemOpTableType RegOp2MemOpTable2;
RegOp2MemOpTableType RegOp2MemOpTable3;
+ RegOp2MemOpTableType RegOp2MemOpTable4;
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=224516&r1=224515&r2=224516&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Dec 18 06:28:22 2014
@@ -3009,6 +3009,7 @@ let Predicates = [HasAVX1Only] in {
/// classes below
multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
SDNode OpNode, SizeItins itins> {
+ let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32,
SSEPackedSingle, itins.s, 0>, PS, VEX_4V;
@@ -3022,6 +3023,7 @@ multiclass basic_sse12_fp_binop_p<bits<8
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
OpNode, VR256, v4f64, f256mem, loadv4f64,
SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L;
+ }
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
Modified: llvm/trunk/test/CodeGen/X86/avx512-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-arith.ll?rev=224516&r1=224515&r2=224516&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-arith.ll Thu Dec 18 06:28:22 2014
@@ -462,3 +462,193 @@ entry:
%d = and <8 x i64> %p1, %c
ret <8 x i64>%d
}
+
+; CHECK-LABEL: test_mask_vaddps
+; CHECK: vaddps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
+ <16 x float> %j, <16 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+ %x = fadd <16 x float> %i, %j
+ %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
+ ret <16 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmulps
+; CHECK: vmulps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
+ <16 x float> %j, <16 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+ %x = fmul <16 x float> %i, %j
+ %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
+ ret <16 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vminps
+; CHECK: vminps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
+ <16 x float> %j, <16 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+ %cmp_res = fcmp olt <16 x float> %i, %j
+ %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
+ %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
+ ret <16 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vminpd
+; CHECK: vminpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
+ <8 x double> %j, <8 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+ %cmp_res = fcmp olt <8 x double> %i, %j
+ %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
+ %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
+ ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vmaxps
+; CHECK: vmaxps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
+ <16 x float> %j, <16 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+ %cmp_res = fcmp ogt <16 x float> %i, %j
+ %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
+ %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
+ ret <16 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmaxpd
+; CHECK: vmaxpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
+ <8 x double> %j, <8 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+ %cmp_res = fcmp ogt <8 x double> %i, %j
+ %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
+ %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
+ ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vsubps
+; CHECK: vsubps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
+ <16 x float> %j, <16 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+ %x = fsub <16 x float> %i, %j
+ %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
+ ret <16 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vdivps
+; CHECK: vdivps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
+ <16 x float> %j, <16 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+ %x = fdiv <16 x float> %i, %j
+ %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
+ ret <16 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vaddpd
+; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
+ <8 x double> %j, <8 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+ %x = fadd <8 x double> %i, %j
+ %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
+ ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_vaddpd
+; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}}}
+; CHECK: ret
+define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
+ <8 x i64> %mask1) nounwind readnone {
+ %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+ %x = fadd <8 x double> %i, %j
+ %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
+ ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_mask_fold_vaddpd
+; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}.*}}
+; CHECK: ret
+define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
+ <8 x double>* %j, <8 x i64> %mask1)
+ nounwind {
+ %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+ %tmp = load <8 x double>* %j, align 8
+ %x = fadd <8 x double> %i, %tmp
+ %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
+ ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_fold_vaddpd
+; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}.*}}
+; CHECK: ret
+define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
+ <8 x i64> %mask1) nounwind {
+ %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+ %tmp = load <8 x double>* %j, align 8
+ %x = fadd <8 x double> %i, %tmp
+ %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
+ ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_broadcast_vaddpd
+; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*}}
+; CHECK: ret
+define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
+ %tmp = load double* %j
+ %b = insertelement <8 x double> undef, double %tmp, i32 0
+ %c = shufflevector <8 x double> %b, <8 x double> undef,
+ <8 x i32> zeroinitializer
+ %x = fadd <8 x double> %c, %i
+ ret <8 x double> %x
+}
+
+; CHECK-LABEL: test_mask_broadcast_vaddpd
+; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]}.*}}
+; CHECK: ret
+define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
+ double* %j, <8 x i64> %mask1) nounwind {
+ %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+ %tmp = load double* %j
+ %b = insertelement <8 x double> undef, double %tmp, i32 0
+ %c = shufflevector <8 x double> %b, <8 x double> undef,
+ <8 x i32> zeroinitializer
+ %x = fadd <8 x double> %c, %i
+ %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
+ ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_broadcast_vaddpd
+; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]} {z}.*}}
+; CHECK: ret
+define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
+ <8 x i64> %mask1) nounwind {
+ %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+ %tmp = load double* %j
+ %b = insertelement <8 x double> undef, double %tmp, i32 0
+ %c = shufflevector <8 x double> %b, <8 x double> undef,
+ <8 x i32> zeroinitializer
+ %x = fadd <8 x double> %c, %i
+ %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
+ ret <8 x double> %r
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-arith.ll?rev=224516&r1=224515&r2=224516&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-arith.ll Thu Dec 18 06:28:22 2014
@@ -149,6 +149,258 @@ define <8 x i32> @vpmulld256_test(<8 x i
ret <8 x i32> %x
}
+; CHECK-LABEL: test_vaddpd_256
+; CHECK: vaddpd{{.*}}
+; CHECK: ret
+define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) {
+entry:
+ %add.i = fadd <4 x double> %x, %y
+ ret <4 x double> %add.i
+}
+
+; CHECK-LABEL: test_fold_vaddpd_256
+; CHECK: vaddpd LCP{{.*}}(%rip){{.*}}
+; CHECK: ret
+define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) {
+entry:
+ %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 4.500000e+00, double 5.600000e+00>
+ ret <4 x double> %add.i
+}
+
+; CHECK-LABEL: test_broadcast_vaddpd_256
+; CHECK: LCP{{.*}}(%rip){1to8}, %ymm0, %ymm0
+; CHECK: ret
+define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind {
+ %b = fadd <8 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+ ret <8 x float> %b
+}
+
+; CHECK-LABEL: test_mask_vaddps_256
+; CHECK: vaddps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i,
+ <8 x float> %j, <8 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+ %x = fadd <8 x float> %i, %j
+ %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
+ ret <8 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmulps_256
+; CHECK: vmulps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i,
+ <8 x float> %j, <8 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+ %x = fmul <8 x float> %i, %j
+ %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
+ ret <8 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vminps_256
+; CHECK: vminps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i,
+ <8 x float> %j, <8 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+ %cmp_res = fcmp olt <8 x float> %i, %j
+ %min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
+ %r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst
+ ret <8 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmaxps_256
+; CHECK: vmaxps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i,
+ <8 x float> %j, <8 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+ %cmp_res = fcmp ogt <8 x float> %i, %j
+ %max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
+ %r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst
+ ret <8 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vsubps_256
+; CHECK: vsubps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i,
+ <8 x float> %j, <8 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+ %x = fsub <8 x float> %i, %j
+ %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
+ ret <8 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vdivps_256
+; CHECK: vdivps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i,
+ <8 x float> %j, <8 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+ %x = fdiv <8 x float> %i, %j
+ %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
+ ret <8 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmulpd_256
+; CHECK: vmulpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i,
+ <4 x double> %j, <4 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+ %x = fmul <4 x double> %i, %j
+ %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
+ ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vminpd_256
+; CHECK: vminpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i,
+ <4 x double> %j, <4 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+ %cmp_res = fcmp olt <4 x double> %i, %j
+ %min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
+ %r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst
+ ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vmaxpd_256
+; CHECK: vmaxpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i,
+ <4 x double> %j, <4 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+ %cmp_res = fcmp ogt <4 x double> %i, %j
+ %max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
+ %r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst
+ ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vsubpd_256
+; CHECK: vsubpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i,
+ <4 x double> %j, <4 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+ %x = fsub <4 x double> %i, %j
+ %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
+ ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vdivpd_256
+; CHECK: vdivpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i,
+ <4 x double> %j, <4 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+ %x = fdiv <4 x double> %i, %j
+ %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
+ ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vaddpd_256
+; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i,
+ <4 x double> %j, <4 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+ %x = fadd <4 x double> %i, %j
+ %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
+ ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_vaddpd_256
+; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}}}
+; CHECK: ret
+define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j,
+ <4 x i64> %mask1) nounwind readnone {
+ %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+ %x = fadd <4 x double> %i, %j
+ %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
+ ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_mask_fold_vaddpd_256
+; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}.*}}
+; CHECK: ret
+define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i,
+ <4 x double>* %j, <4 x i64> %mask1)
+ nounwind {
+ %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+ %tmp = load <4 x double>* %j
+ %x = fadd <4 x double> %i, %tmp
+ %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
+ ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_fold_vaddpd_256
+; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}.*}}
+; CHECK: ret
+define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j,
+ <4 x i64> %mask1) nounwind {
+ %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+ %tmp = load <4 x double>* %j
+ %x = fadd <4 x double> %i, %tmp
+ %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
+ ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_broadcast2_vaddpd_256
+; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*}}
+; CHECK: ret
+define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind {
+ %tmp = load double* %j
+ %b = insertelement <4 x double> undef, double %tmp, i32 0
+ %c = shufflevector <4 x double> %b, <4 x double> undef,
+ <4 x i32> zeroinitializer
+ %x = fadd <4 x double> %c, %i
+ ret <4 x double> %x
+}
+
+; CHECK-LABEL: test_mask_broadcast_vaddpd_256
+; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]}.*}}
+; CHECK: ret
+define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i,
+ double* %j, <4 x i64> %mask1) nounwind {
+ %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+ %tmp = load double* %j
+ %b = insertelement <4 x double> undef, double %tmp, i32 0
+ %c = shufflevector <4 x double> %b, <4 x double> undef,
+ <4 x i32> zeroinitializer
+ %x = fadd <4 x double> %c, %i
+ %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i
+ ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_broadcast_vaddpd_256
+; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]} {z}.*}}
+; CHECK: ret
+define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j,
+ <4 x i64> %mask1) nounwind {
+ %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+ %tmp = load double* %j
+ %b = insertelement <4 x double> undef, double %tmp, i32 0
+ %c = shufflevector <4 x double> %b, <4 x double> undef,
+ <4 x i32> zeroinitializer
+ %x = fadd <4 x double> %c, %i
+ %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
+ ret <4 x double> %r
+}
+
; 128-bit
; CHECK-LABEL: vpaddq128_test
@@ -289,3 +541,254 @@ define <4 x i32> @vpmulld128_test(<4 x i
%x = mul <4 x i32> %i, %j
ret <4 x i32> %x
}
+
+; CHECK-LABEL: test_vaddpd_128
+; CHECK: vaddpd{{.*}}
+; CHECK: ret
+define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) {
+entry:
+ %add.i = fadd <2 x double> %x, %y
+ ret <2 x double> %add.i
+}
+
+; CHECK-LABEL: test_fold_vaddpd_128
+; CHECK: vaddpd LCP{{.*}}(%rip){{.*}}
+; CHECK: ret
+define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) {
+entry:
+ %add.i = fadd <2 x double> %y, <double 4.500000e+00, double 3.400000e+00>
+ ret <2 x double> %add.i
+}
+
+; CHECK-LABEL: test_broadcast_vaddpd_128
+; CHECK: LCP{{.*}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK: ret
+define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind {
+ %b = fadd <4 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+ ret <4 x float> %b
+}
+
+; CHECK-LABEL: test_mask_vaddps_128
+; CHECK: vaddps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i,
+ <4 x float> %j, <4 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+ %x = fadd <4 x float> %i, %j
+ %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
+ ret <4 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmulps_128
+; CHECK: vmulps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i,
+ <4 x float> %j, <4 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+ %x = fmul <4 x float> %i, %j
+ %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
+ ret <4 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vminps_128
+; CHECK: vminps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i,
+ <4 x float> %j, <4 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+ %cmp_res = fcmp olt <4 x float> %i, %j
+ %min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
+ %r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst
+ ret <4 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmaxps_128
+; CHECK: vmaxps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i,
+ <4 x float> %j, <4 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+ %cmp_res = fcmp ogt <4 x float> %i, %j
+ %max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
+ %r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst
+ ret <4 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vsubps_128
+; CHECK: vsubps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i,
+ <4 x float> %j, <4 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+ %x = fsub <4 x float> %i, %j
+ %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
+ ret <4 x float> %r
+}
+
+
+; CHECK-LABEL: test_mask_vdivps_128
+; CHECK: vdivps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i,
+ <4 x float> %j, <4 x i32> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+ %x = fdiv <4 x float> %i, %j
+ %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
+ ret <4 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmulpd_128
+; CHECK: vmulpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i,
+ <2 x double> %j, <2 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+ %x = fmul <2 x double> %i, %j
+ %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
+ ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vminpd_128
+; CHECK: vminpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i,
+ <2 x double> %j, <2 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+ %cmp_res = fcmp olt <2 x double> %i, %j
+ %min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
+ %r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst
+ ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vmaxpd_128
+; CHECK: vmaxpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i,
+ <2 x double> %j, <2 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+ %cmp_res = fcmp ogt <2 x double> %i, %j
+ %max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
+ %r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst
+ ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vsubpd_128
+; CHECK: vsubpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i,
+ <2 x double> %j, <2 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+ %x = fsub <2 x double> %i, %j
+ %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
+ ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vdivpd_128
+; CHECK: vdivpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i,
+ <2 x double> %j, <2 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+ %x = fdiv <2 x double> %i, %j
+ %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
+ ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vaddpd_128
+; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i,
+ <2 x double> %j, <2 x i64> %mask1)
+ nounwind readnone {
+ %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+ %x = fadd <2 x double> %i, %j
+ %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
+ ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_vaddpd_128
+; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}}}
+; CHECK: ret
+define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j,
+ <2 x i64> %mask1) nounwind readnone {
+ %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+ %x = fadd <2 x double> %i, %j
+ %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
+ ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_mask_fold_vaddpd_128
+; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}.*}}
+; CHECK: ret
+define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i,
+ <2 x double>* %j, <2 x i64> %mask1)
+ nounwind {
+ %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+ %tmp = load <2 x double>* %j
+ %x = fadd <2 x double> %i, %tmp
+ %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
+ ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_fold_vaddpd_128
+; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}.*}}
+; CHECK: ret
+define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j,
+ <2 x i64> %mask1) nounwind {
+ %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+ %tmp = load <2 x double>* %j
+ %x = fadd <2 x double> %i, %tmp
+ %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
+ ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_broadcast2_vaddpd_128
+; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*}}
+; CHECK: ret
+define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind {
+ %tmp = load double* %j
+ %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
+ %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
+ %x = fadd <2 x double> %j.1, %i
+ ret <2 x double> %x
+}
+
+; CHECK-LABEL: test_mask_broadcast_vaddpd_128
+; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]}.*}}
+; CHECK: ret
+define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i,
+ double* %j, <2 x i64> %mask1)
+ nounwind {
+ %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+ %tmp = load double* %j
+ %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
+ %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
+ %x = fadd <2 x double> %j.1, %i
+ %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i
+ ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_broadcast_vaddpd_128
+; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]} {z}.*}}
+; CHECK: ret
+define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j,
+ <2 x i64> %mask1) nounwind {
+ %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+ %tmp = load double* %j
+ %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
+ %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
+ %x = fadd <2 x double> %j.1, %i
+ %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
+ ret <2 x double> %r
+}
More information about the llvm-commits
mailing list