[llvm] r314080 - [X86] Add IFMA instructions to the load folding tables and make them commutable for the multiply operands.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 24 10:28:14 PDT 2017
Author: ctopper
Date: Sun Sep 24 10:28:14 2017
New Revision: 314080
URL: http://llvm.org/viewvc/llvm-project?rev=314080&view=rev
Log:
[X86] Add IFMA instructions to the load folding tables and make them commutable for the multiply operands.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=314080&r1=314079&r2=314080&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Sep 24 10:28:14 2017
@@ -6484,7 +6484,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3)), 1, 1>,
AVX512FMA3Base;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=314080&r1=314079&r2=314080&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Sep 24 10:28:14 2017
@@ -2528,6 +2528,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMT2PDrr, X86::VPERMT2PDrm, 0 },
{ X86::VPERMT2Qrr, X86::VPERMT2Qrm, 0 },
{ X86::VPERMT2Wrr, X86::VPERMT2Wrm, 0 },
+ { X86::VPMADD52HUQZr, X86::VPMADD52HUQZm, 0 },
+ { X86::VPMADD52LUQZr, X86::VPMADD52LUQZm, 0 },
{ X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmi, 0 },
{ X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmi, 0 },
@@ -2544,6 +2546,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMT2PS256rr, X86::VPERMT2PS256rm, 0 },
{ X86::VPERMT2Q256rr, X86::VPERMT2Q256rm, 0 },
{ X86::VPERMT2W256rr, X86::VPERMT2W256rm, 0 },
+ { X86::VPMADD52HUQZ256r, X86::VPMADD52HUQZ256m, 0 },
+ { X86::VPMADD52LUQZ256r, X86::VPMADD52LUQZ256m, 0 },
{ X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmi, 0 },
{ X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmi, 0 },
@@ -2560,6 +2564,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMT2PS128rr, X86::VPERMT2PS128rm, 0 },
{ X86::VPERMT2Q128rr, X86::VPERMT2Q128rm, 0 },
{ X86::VPERMT2W128rr, X86::VPERMT2W128rm, 0 },
+ { X86::VPMADD52HUQZ128r, X86::VPMADD52HUQZ128m, 0 },
+ { X86::VPMADD52LUQZ128r, X86::VPMADD52LUQZ128m, 0 },
{ X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmi, 0 },
{ X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmi, 0 },
@@ -3234,6 +3240,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMT2Qrrk, X86::VPERMT2Qrmk, 0 },
{ X86::VPERMT2Wrrk, X86::VPERMT2Wrmk, 0 },
{ X86::VPERMWZrrk, X86::VPERMWZrmk, 0 },
+ { X86::VPMADD52HUQZrk, X86::VPMADD52HUQZmk, 0 },
+ { X86::VPMADD52LUQZrk, X86::VPMADD52LUQZmk, 0 },
{ X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 },
{ X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 },
{ X86::VPMAXSBZrrk, X86::VPMAXSBZrmk, 0 },
@@ -3376,6 +3384,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMT2Q256rrk, X86::VPERMT2Q256rmk, 0 },
{ X86::VPERMT2W256rrk, X86::VPERMT2W256rmk, 0 },
{ X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 },
+ { X86::VPMADD52HUQZ256rk, X86::VPMADD52HUQZ256mk, 0 },
+ { X86::VPMADD52LUQZ256rk, X86::VPMADD52LUQZ256mk, 0 },
{ X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 },
{ X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 },
{ X86::VPMAXSBZ256rrk, X86::VPMAXSBZ256rmk, 0 },
@@ -3509,6 +3519,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMT2Q128rrk, X86::VPERMT2Q128rmk, 0 },
{ X86::VPERMT2W128rrk, X86::VPERMT2W128rmk, 0 },
{ X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 },
+ { X86::VPMADD52HUQZ128rk, X86::VPMADD52HUQZ128mk, 0 },
+ { X86::VPMADD52LUQZ128rk, X86::VPMADD52LUQZ128mk, 0 },
{ X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 },
{ X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 },
{ X86::VPMAXSBZ128rrk, X86::VPMAXSBZ128rmk, 0 },
@@ -3597,6 +3609,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMT2PDrrkz, X86::VPERMT2PDrmkz, 0 },
{ X86::VPERMT2Qrrkz, X86::VPERMT2Qrmkz, 0 },
{ X86::VPERMT2Wrrkz, X86::VPERMT2Wrmkz, 0 },
+ { X86::VPMADD52HUQZrkz, X86::VPMADD52HUQZmkz, 0 },
+ { X86::VPMADD52LUQZrkz, X86::VPMADD52LUQZmkz, 0 },
{ X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 },
{ X86::VPTERNLOGQZrrikz, X86::VPTERNLOGQZrmikz, 0 },
@@ -3613,6 +3627,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMT2PS256rrkz, X86::VPERMT2PS256rmkz, 0 },
{ X86::VPERMT2Q256rrkz, X86::VPERMT2Q256rmkz, 0 },
{ X86::VPERMT2W256rrkz, X86::VPERMT2W256rmkz, 0 },
+ { X86::VPMADD52HUQZ256rkz, X86::VPMADD52HUQZ256mkz, 0 },
+ { X86::VPMADD52LUQZ256rkz, X86::VPMADD52LUQZ256mkz, 0 },
{ X86::VPTERNLOGDZ256rrikz,X86::VPTERNLOGDZ256rmikz, 0 },
{ X86::VPTERNLOGQZ256rrikz,X86::VPTERNLOGQZ256rmikz, 0 },
@@ -3629,6 +3645,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMT2PS128rrkz, X86::VPERMT2PS128rmkz, 0 },
{ X86::VPERMT2Q128rrkz, X86::VPERMT2Q128rmkz, 0 },
{ X86::VPERMT2W128rrkz, X86::VPERMT2W128rmkz, 0 },
+ { X86::VPMADD52HUQZ128rkz, X86::VPMADD52HUQZ128mkz, 0 },
+ { X86::VPMADD52LUQZ128rkz, X86::VPMADD52LUQZ128mkz, 0 },
{ X86::VPTERNLOGDZ128rrikz,X86::VPTERNLOGDZ128rmikz, 0 },
{ X86::VPTERNLOGQZ128rrikz,X86::VPTERNLOGQZ128rmikz, 0 },
};
@@ -5631,6 +5649,41 @@ bool X86InstrInfo::findCommutedOpIndices
case X86::VPTERNLOGQZ256rmbikz:
case X86::VPTERNLOGQZrmbikz:
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+ case X86::VPMADD52HUQZ128r:
+ case X86::VPMADD52HUQZ128rk:
+ case X86::VPMADD52HUQZ128rkz:
+ case X86::VPMADD52HUQZ256r:
+ case X86::VPMADD52HUQZ256rk:
+ case X86::VPMADD52HUQZ256rkz:
+ case X86::VPMADD52HUQZr:
+ case X86::VPMADD52HUQZrk:
+ case X86::VPMADD52HUQZrkz:
+ case X86::VPMADD52LUQZ128r:
+ case X86::VPMADD52LUQZ128rk:
+ case X86::VPMADD52LUQZ128rkz:
+ case X86::VPMADD52LUQZ256r:
+ case X86::VPMADD52LUQZ256rk:
+ case X86::VPMADD52LUQZ256rkz:
+ case X86::VPMADD52LUQZr:
+ case X86::VPMADD52LUQZrk:
+ case X86::VPMADD52LUQZrkz: {
+ unsigned CommutableOpIdx1 = 2;
+ unsigned CommutableOpIdx2 = 3;
+ if (Desc.TSFlags & X86II::EVEX_K) {
+ // Skip the mask register.
+ ++CommutableOpIdx1;
+ ++CommutableOpIdx2;
+ }
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
+ CommutableOpIdx1, CommutableOpIdx2))
+ return false;
+ if (!MI.getOperand(SrcOpIdx1).isReg() ||
+ !MI.getOperand(SrcOpIdx2).isReg())
+ // No idea.
+ return false;
+ return true;
+ }
+
default:
const X86InstrFMA3Group *FMA3Group =
X86InstrFMA3Info::getFMA3Group(MI.getOpcode());
Modified: llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll?rev=314080&r1=314079&r2=314080&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll Sun Sep 24 10:28:14 2017
@@ -112,3 +112,73 @@ define <8 x i64>@test_int_x86_avx512_mas
%res6 = add <8 x i64> %res5, %res4
ret <8 x i64> %res6
}
+
+define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr) {
+; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0
+; CHECK-NEXT: retq
+
+ %x2 = load <8 x i64>, <8 x i64>* %x2ptr
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2) {
+; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0
+; CHECK-NEXT: retq
+
+ %x1 = load <8 x i64>, <8 x i64>* %x1ptr
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+
+ %x2 = load <8 x i64>, <8 x i64>* %x2ptr
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ ret <8 x i64> %res
+}
+
+define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+
+ %x1 = load <8 x i64>, <8 x i64>* %x1ptr
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ ret <8 x i64> %res
+}
+
+define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+
+ %x2 = load <8 x i64>, <8 x i64>* %x2ptr
+ %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ ret <8 x i64> %res
+}
+
+define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+
+ %x1 = load <8 x i64>, <8 x i64>* %x1ptr
+ %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ ret <8 x i64> %res
+}
More information about the llvm-commits
mailing list