[llvm] r294153 - [AVX-512] Add scalar masked max/min intrinsic instructions to the load folding tables.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 5 14:25:46 PST 2017
Author: ctopper
Date: Sun Feb 5 16:25:46 2017
New Revision: 294153
URL: http://llvm.org/viewvc/llvm-project?rev=294153&view=rev
Log:
[AVX-512] Add scalar masked max/min intrinsic instructions to the load folding tables.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=294153&r1=294152&r2=294153&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Feb 5 16:25:46 2017
@@ -2339,10 +2339,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 },
{ X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
{ X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
+ { X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, 0 },
+ { X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, 0 },
{ X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 },
{ X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 },
{ X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
{ X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
+ { X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, 0 },
+ { X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, 0 },
{ X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
{ X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
{ X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE },
@@ -2674,10 +2678,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 },
{ X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
{ X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
+ { X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, 0 },
+ { X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, 0 },
{ X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 },
{ X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 },
{ X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
{ X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
+ { X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, 0 },
+ { X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, 0 },
{ X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
{ X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
{ X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE },
@@ -7744,6 +7752,8 @@ static bool isNonFoldablePartialRegister
case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz:
case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz:
+ case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz:
+ case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz:
case X86::VMULSSZrr_Intk: case X86::VMULSSZrr_Intkz:
case X86::VSUBSSZrr_Intk: case X86::VSUBSSZrr_Intkz:
case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int:
@@ -7793,6 +7803,8 @@ static bool isNonFoldablePartialRegister
case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz:
case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz:
+ case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz:
+ case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz:
case X86::VMULSDZrr_Intk: case X86::VMULSDZrr_Intkz:
case X86::VSUBSDZrr_Intk: case X86::VSUBSDZrr_Intkz:
case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int:
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=294153&r1=294152&r2=294153&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sun Feb 5 16:25:46 2017
@@ -2510,6 +2510,39 @@ define <4 x float> @test_max_ss(<4 x flo
%res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
ret <4 x float> %res
}
+
+define <4 x float> @test_mask_max_ss_memfold(<4 x float> %a0, float* %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_max_ss_memfold:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmaxss (%rdi), %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %a1.val = load float, float* %a1
+ %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
+ %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
+ %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
+ %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_max_ss_memfold(<4 x float> %a0, float* %a1, i8 %mask) {
+; CHECK-LABEL: test_maskz_max_ss_memfold:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmaxss (%rdi), %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a1.val = load float, float* %a1
+ %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
+ %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
+ %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
+ %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
@@ -2576,6 +2609,35 @@ define <2 x double> @test_max_sd(<2 x do
ret <2 x double> %res
}
+define <2 x double> @test_mask_max_sd_memfold(<2 x double> %a0, double* %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_max_sd_memfold:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmaxsd (%rdi), %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovapd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %a1.val = load double, double* %a1
+ %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0
+ %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> %a2, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_max_sd_memfold(<2 x double> %a0, double* %a1, i8 %mask) {
+; CHECK-LABEL: test_maskz_max_sd_memfold:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a1.val = load double, double* %a1
+ %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0
+ %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> zeroinitializer, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
; CHECK: ## BB#0:
More information about the llvm-commits
mailing list