[llvm] r294151 - [AVX-512] Add masked scalar FMA intrinsics to isNonFoldablePartialRegisterLoad to improve load folding of scalar loads.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 5 14:25:40 PST 2017
Author: ctopper
Date: Sun Feb 5 16:25:40 2017
New Revision: 294151
URL: http://llvm.org/viewvc/llvm-project?rev=294151&view=rev
Log:
[AVX-512] Add masked scalar FMA intrinsics to isNonFoldablePartialRegisterLoad to improve load folding of scalar loads.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=294151&r1=294150&r2=294151&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Feb 5 16:25:40 2017
@@ -7740,6 +7740,18 @@ static bool isNonFoldablePartialRegister
case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int:
case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int:
case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int:
+ case X86::VFMADD132SSZr_Intk: case X86::VFNMADD132SSZr_Intk:
+ case X86::VFMADD213SSZr_Intk: case X86::VFNMADD213SSZr_Intk:
+ case X86::VFMADD231SSZr_Intk: case X86::VFNMADD231SSZr_Intk:
+ case X86::VFMSUB132SSZr_Intk: case X86::VFNMSUB132SSZr_Intk:
+ case X86::VFMSUB213SSZr_Intk: case X86::VFNMSUB213SSZr_Intk:
+ case X86::VFMSUB231SSZr_Intk: case X86::VFNMSUB231SSZr_Intk:
+ case X86::VFMADD132SSZr_Intkz: case X86::VFNMADD132SSZr_Intkz:
+ case X86::VFMADD213SSZr_Intkz: case X86::VFNMADD213SSZr_Intkz:
+ case X86::VFMADD231SSZr_Intkz: case X86::VFNMADD231SSZr_Intkz:
+ case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz:
+ case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz:
+ case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz:
return false;
default:
return true;
@@ -7773,6 +7785,18 @@ static bool isNonFoldablePartialRegister
case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int:
case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int:
case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int:
+ case X86::VFMADD132SDZr_Intk: case X86::VFNMADD132SDZr_Intk:
+ case X86::VFMADD213SDZr_Intk: case X86::VFNMADD213SDZr_Intk:
+ case X86::VFMADD231SDZr_Intk: case X86::VFNMADD231SDZr_Intk:
+ case X86::VFMSUB132SDZr_Intk: case X86::VFNMSUB132SDZr_Intk:
+ case X86::VFMSUB213SDZr_Intk: case X86::VFNMSUB213SDZr_Intk:
+ case X86::VFMSUB231SDZr_Intk: case X86::VFNMSUB231SDZr_Intk:
+ case X86::VFMADD132SDZr_Intkz: case X86::VFNMADD132SDZr_Intkz:
+ case X86::VFMADD213SDZr_Intkz: case X86::VFNMADD213SDZr_Intkz:
+ case X86::VFMADD231SDZr_Intkz: case X86::VFNMADD231SDZr_Intkz:
+ case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz:
+ case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz:
+ case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz:
return false;
default:
return true;
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=294151&r1=294150&r2=294151&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sun Feb 5 16:25:40 2017
@@ -4843,10 +4843,9 @@ define void @fmadd_ss_mask_memfold(float
; CHECK-LABEL: fmadd_ss_mask_memfold:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: andl $1, %edx
; CHECK-NEXT: kmovw %edx, %k1
-; CHECK-NEXT: vfmadd213ss %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 {%k1}
; CHECK-NEXT: vmovss %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load float, float* %a
@@ -4872,10 +4871,9 @@ define void @fmadd_ss_maskz_memfold(floa
; CHECK-LABEL: fmadd_ss_maskz_memfold:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: andl $1, %edx
; CHECK-NEXT: kmovw %edx, %k1
-; CHECK-NEXT: vfmadd213ss %xmm0, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT: vfmadd132ss (%rsi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovss %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load float, float* %a
@@ -4901,10 +4899,9 @@ define void @fmadd_sd_mask_memfold(doubl
; CHECK-LABEL: fmadd_sd_mask_memfold:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: andl $1, %edx
; CHECK-NEXT: kmovw %edx, %k1
-; CHECK-NEXT: vfmadd213sd %xmm0, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 {%k1}
; CHECK-NEXT: vmovlps %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load double, double* %a
@@ -4926,10 +4923,9 @@ define void @fmadd_sd_maskz_memfold(doub
; CHECK-LABEL: fmadd_sd_maskz_memfold:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: andl $1, %edx
; CHECK-NEXT: kmovw %edx, %k1
-; CHECK-NEXT: vfmadd213sd %xmm0, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT: vfmadd132sd (%rsi), %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vmovlps %xmm0, (%rdi)
; CHECK-NEXT: retq
%a.val = load double, double* %a
More information about the llvm-commits
mailing list