[llvm] r294152 - [AVX-512] Add scalar masked add/sub/mul/div intrinsic instructions to the load folding tables.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 5 14:25:43 PST 2017
Author: ctopper
Date: Sun Feb 5 16:25:42 2017
New Revision: 294152
URL: http://llvm.org/viewvc/llvm-project?rev=294152&view=rev
Log:
[AVX-512] Add scalar masked add/sub/mul/div intrinsic instructions to the load folding tables.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=294152&r1=294151&r2=294152&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Feb 5 16:25:42 2017
@@ -2315,6 +2315,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
// AVX-512 masked instructions
{ X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
{ X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
+ { X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VALIGNDZrrikz, X86::VALIGNDZrmikz, 0 },
{ X86::VALIGNQZrrikz, X86::VALIGNQZrmikz, 0 },
{ X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 },
@@ -2323,6 +2325,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 },
{ X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
{ X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
+ { X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 },
{ X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 },
{ X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 },
@@ -2341,6 +2345,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
{ X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
{ X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
+ { X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VORPDZrrkz, X86::VORPDZrmkz, 0 },
{ X86::VORPSZrrkz, X86::VORPSZrmkz, 0 },
{ X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 },
@@ -2389,6 +2395,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 },
{ X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
{ X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
+ { X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VUNPCKHPDZrrkz, X86::VUNPCKHPDZrmkz, 0 },
{ X86::VUNPCKHPSZrrkz, X86::VUNPCKHPSZrmkz, 0 },
{ X86::VUNPCKLPDZrrkz, X86::VUNPCKLPDZrmkz, 0 },
@@ -2642,6 +2650,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
// AVX-512 foldable masked instructions
{ X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
{ X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
+ { X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE },
{ X86::VALIGNDZrrik, X86::VALIGNDZrmik, 0 },
{ X86::VALIGNQZrrik, X86::VALIGNQZrmik, 0 },
{ X86::VANDNPDZrrk, X86::VANDNPDZrmk, 0 },
@@ -2650,6 +2660,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VANDPSZrrk, X86::VANDPSZrmk, 0 },
{ X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
{ X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
+ { X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE },
{ X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 },
{ X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 },
{ X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 },
@@ -2668,6 +2680,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
{ X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
{ X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
+ { X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE },
{ X86::VORPDZrrk, X86::VORPDZrmk, 0 },
{ X86::VORPSZrrk, X86::VORPSZrmk, 0 },
{ X86::VPADDBZrrk, X86::VPADDBZrmk, 0 },
@@ -2729,6 +2743,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPXORQZrrk, X86::VPXORQZrmk, 0 },
{ X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
{ X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
+ { X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE },
{ X86::VUNPCKHPDZrrk, X86::VUNPCKHPDZrmk, 0 },
{ X86::VUNPCKHPSZrrk, X86::VUNPCKHPSZrmk, 0 },
{ X86::VUNPCKLPDZrrk, X86::VUNPCKLPDZrmk, 0 },
@@ -7726,6 +7742,10 @@ static bool isNonFoldablePartialRegister
case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int:
case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int:
case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
+ case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz:
+ case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz:
+ case X86::VMULSSZrr_Intk: case X86::VMULSSZrr_Intkz:
+ case X86::VSUBSSZrr_Intk: case X86::VSUBSSZrr_Intkz:
case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int:
case X86::VFMSUBSS4rr_Int: case X86::VFNMSUBSS4rr_Int:
case X86::VFMADD132SSr_Int: case X86::VFNMADD132SSr_Int:
@@ -7771,6 +7791,10 @@ static bool isNonFoldablePartialRegister
case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int:
case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int:
case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
+ case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz:
+ case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz:
+ case X86::VMULSDZrr_Intk: case X86::VMULSDZrr_Intkz:
+ case X86::VSUBSDZrr_Intk: case X86::VSUBSDZrr_Intkz:
case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int:
case X86::VFMSUBSD4rr_Int: case X86::VFNMSUBSD4rr_Int:
case X86::VFMADD132SDr_Int: case X86::VFNMADD132SDr_Int:
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=294152&r1=294151&r2=294152&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sun Feb 5 16:25:42 2017
@@ -2301,6 +2301,39 @@ define <4 x float> @test_add_ss_rn(<4 x
ret <4 x float> %res
}
+define <4 x float> @test_mask_add_ss_current_memfold(<4 x float> %a0, float* %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_add_ss_current_memfold:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %a1.val = load float, float* %a1
+ %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
+ %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
+ %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
+ %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_add_ss_current_memfold(<4 x float> %a0, float* %a1, i8 %mask) {
+; CHECK-LABEL: test_maskz_add_ss_current_memfold:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vaddss (%rdi), %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a1.val = load float, float* %a1
+ %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
+ %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
+ %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
+ %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+
declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
@@ -2383,6 +2416,35 @@ define <2 x double> @test_add_sd_rn(<2 x
ret <2 x double> %res
}
+define <2 x double> @test_mask_add_sd_current_memfold(<2 x double> %a0, double* %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_add_sd_current_memfold:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vaddsd (%rdi), %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovapd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %a1.val = load double, double* %a1
+ %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0
+ %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> %a2, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_add_sd_current_memfold(<2 x double> %a0, double* %a1, i8 %mask) {
+; CHECK-LABEL: test_maskz_add_sd_current_memfold:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vaddsd (%rdi), %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a1.val = load double, double* %a1
+ %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0
+ %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> zeroinitializer, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
More information about the llvm-commits
mailing list