[llvm] r334728 - [X86] Add more vector instructions to the memory folding table using the autogenerated table as a guide.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 14 08:40:31 PDT 2018


Author: ctopper
Date: Thu Jun 14 08:40:31 2018
New Revision: 334728

URL: http://llvm.org/viewvc/llvm-project?rev=334728&view=rev
Log:
[X86] Add more vector instructions to the memory folding table using the autogenerated table as a guide.

The test cahnge is because we now fold stack reload into RNDSCALE and RNDSCALE can be turned into ROUND by EVEX->VEX.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=334728&r1=334727&r2=334728&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Thu Jun 14 08:40:31 2018
@@ -563,7 +563,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::CMP32rr,         X86::CMP32rm,             0 },
     { X86::CMP64rr,         X86::CMP64rm,             0 },
     { X86::CMP8rr,          X86::CMP8rm,              0 },
+    { X86::COMISDrr,        X86::COMISDrm,            0 },
     { X86::COMISDrr_Int,    X86::COMISDrm_Int,        TB_NO_REVERSE },
+    { X86::COMISSrr,        X86::COMISSrm, 0 },
     { X86::COMISSrr_Int,    X86::COMISSrm_Int,        TB_NO_REVERSE },
     { X86::CVTDQ2PDrr,      X86::CVTDQ2PDrm,          TB_NO_REVERSE },
     { X86::CVTDQ2PSrr,      X86::CVTDQ2PSrm,          TB_ALIGN_16 },
@@ -692,7 +694,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::PSWAPDrr,        X86::PSWAPDrm,            0 },
 
     // AVX 128-bit versions of foldable instructions
+    { X86::VCOMISDrr,       X86::VCOMISDrm,           0 },
     { X86::VCOMISDrr_Int,   X86::VCOMISDrm_Int,       TB_NO_REVERSE },
+    { X86::VCOMISSrr,       X86::VCOMISSrm,           0 },
     { X86::VCOMISSrr_Int,   X86::VCOMISSrm_Int,       TB_NO_REVERSE },
     { X86::VCVTTSD2SI64rr,  X86::VCVTTSD2SI64rm,      0 },
     { X86::VCVTTSD2SI64rr_Int,X86::VCVTTSD2SI64rm_Int,TB_NO_REVERSE },
@@ -933,6 +937,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     // AVX-512 foldable instructions
     { X86::VBROADCASTSSZr,   X86::VBROADCASTSSZm,     TB_NO_REVERSE },
     { X86::VBROADCASTSDZr,   X86::VBROADCASTSDZm,     TB_NO_REVERSE },
+    { X86::VCOMISDZrr,       X86::VCOMISDZrm,         0 },
+    { X86::VCOMISDZrr_Int,   X86::VCOMISDZrm_Int,     TB_NO_REVERSE },
+    { X86::VCOMISSZrr,       X86::VCOMISSZrm,         0 },
+    { X86::VCOMISSZrr_Int,   X86::VCOMISSZrm_Int,     TB_NO_REVERSE },
     { X86::VCVTDQ2PDZrr,     X86::VCVTDQ2PDZrm,       0 },
     { X86::VCVTPD2PSZrr,     X86::VCVTPD2PSZrm,       0 },
     { X86::VCVTUDQ2PDZrr,    X86::VCVTUDQ2PDZrm,      0 },
@@ -942,12 +950,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VMOVDI2SSZrr,     X86::VMOVDI2SSZrm,       0 },
     { X86::VMOVAPDZrr,       X86::VMOVAPDZrm,         TB_ALIGN_64 },
     { X86::VMOVAPSZrr,       X86::VMOVAPSZrm,         TB_ALIGN_64 },
+    { X86::VMOVDDUPZrr,      X86::VMOVDDUPZrm,        0 },
     { X86::VMOVDQA32Zrr,     X86::VMOVDQA32Zrm,       TB_ALIGN_64 },
     { X86::VMOVDQA64Zrr,     X86::VMOVDQA64Zrm,       TB_ALIGN_64 },
     { X86::VMOVDQU8Zrr,      X86::VMOVDQU8Zrm,        0 },
     { X86::VMOVDQU16Zrr,     X86::VMOVDQU16Zrm,       0 },
     { X86::VMOVDQU32Zrr,     X86::VMOVDQU32Zrm,       0 },
     { X86::VMOVDQU64Zrr,     X86::VMOVDQU64Zrm,       0 },
+    { X86::VMOVSHDUPZrr,     X86::VMOVSHDUPZrm,       0 },
+    { X86::VMOVSLDUPZrr,     X86::VMOVSLDUPZrm,       0 },
     { X86::VMOVUPDZrr,       X86::VMOVUPDZrm,         0 },
     { X86::VMOVUPSZrr,       X86::VMOVUPSZrm,         0 },
     { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm,      TB_NO_REVERSE },
@@ -955,6 +966,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPABSDZrr,        X86::VPABSDZrm,          0 },
     { X86::VPABSQZrr,        X86::VPABSQZrm,          0 },
     { X86::VPABSWZrr,        X86::VPABSWZrm,          0 },
+    { X86::VPBROADCASTBZr,   X86::VPBROADCASTBZm,     TB_NO_REVERSE },
+    { X86::VPBROADCASTDZr,   X86::VPBROADCASTDZm,     TB_NO_REVERSE },
+    { X86::VPBROADCASTQZr,   X86::VPBROADCASTQZm,     TB_NO_REVERSE },
+    { X86::VPBROADCASTWZr,   X86::VPBROADCASTWZm,     TB_NO_REVERSE },
     { X86::VPCONFLICTDZrr,   X86::VPCONFLICTDZrm,     0 },
     { X86::VPCONFLICTQZrr,   X86::VPCONFLICTQZrm,     0 },
     { X86::VPERMILPDZri,     X86::VPERMILPDZmi,       0 },
@@ -993,6 +1008,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPSRLDZri,        X86::VPSRLDZmi,          0 },
     { X86::VPSRLQZri,        X86::VPSRLQZmi,          0 },
     { X86::VPSRLWZri,        X86::VPSRLWZmi,          0 },
+    { X86::VRCP14PDZr,       X86::VRCP14PDZm,         0 },
+    { X86::VRCP14PSZr,       X86::VRCP14PSZm,         0 },
+    { X86::VRCP28PDr,        X86::VRCP28PDm,          0 },
+    { X86::VRCP28PSr,        X86::VRCP28PSm,          0 },
+    { X86::VRSQRT14PDZr,     X86::VRSQRT14PDZm,       0 },
+    { X86::VRSQRT14PSZr,     X86::VRSQRT14PSZm,       0 },
+    { X86::VRSQRT28PDr,      X86::VRSQRT28PDm,        0 },
+    { X86::VRSQRT28PSr,      X86::VRSQRT28PSm,        0 },
+    { X86::VSQRTPDZr,        X86::VSQRTPDZm,          0 },
+    { X86::VSQRTPSZr,        X86::VSQRTPSZm,          0 },
+    { X86::VUCOMISDZrr,      X86::VUCOMISDZrm,        0 },
+    { X86::VUCOMISDZrr_Int,  X86::VUCOMISDZrm_Int,    TB_NO_REVERSE },
+    { X86::VUCOMISSZrr,      X86::VUCOMISSZrm,        0 },
+    { X86::VUCOMISSZrr_Int,  X86::VUCOMISSZrm_Int,    TB_NO_REVERSE },
 
     // AVX-512 foldable instructions (256-bit versions)
     { X86::VBROADCASTSSZ256r,    X86::VBROADCASTSSZ256m,    TB_NO_REVERSE },
@@ -1002,18 +1031,25 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VCVTUDQ2PDZ256rr,     X86::VCVTUDQ2PDZ256rm,     0 },
     { X86::VMOVAPDZ256rr,        X86::VMOVAPDZ256rm,        TB_ALIGN_32 },
     { X86::VMOVAPSZ256rr,        X86::VMOVAPSZ256rm,        TB_ALIGN_32 },
+    { X86::VMOVDDUPZ256rr,       X86::VMOVDDUPZ256rm,       0 },
     { X86::VMOVDQA32Z256rr,      X86::VMOVDQA32Z256rm,      TB_ALIGN_32 },
     { X86::VMOVDQA64Z256rr,      X86::VMOVDQA64Z256rm,      TB_ALIGN_32 },
     { X86::VMOVDQU8Z256rr,       X86::VMOVDQU8Z256rm,       0 },
     { X86::VMOVDQU16Z256rr,      X86::VMOVDQU16Z256rm,      0 },
     { X86::VMOVDQU32Z256rr,      X86::VMOVDQU32Z256rm,      0 },
     { X86::VMOVDQU64Z256rr,      X86::VMOVDQU64Z256rm,      0 },
+    { X86::VMOVSHDUPZ256rr,      X86::VMOVSHDUPZ256rm,      0 },
+    { X86::VMOVSLDUPZ256rr,      X86::VMOVSLDUPZ256rm,      0 },
     { X86::VMOVUPDZ256rr,        X86::VMOVUPDZ256rm,        0 },
     { X86::VMOVUPSZ256rr,        X86::VMOVUPSZ256rm,        0 },
     { X86::VPABSBZ256rr,         X86::VPABSBZ256rm,         0 },
     { X86::VPABSDZ256rr,         X86::VPABSDZ256rm,         0 },
     { X86::VPABSQZ256rr,         X86::VPABSQZ256rm,         0 },
     { X86::VPABSWZ256rr,         X86::VPABSWZ256rm,         0 },
+    { X86::VPBROADCASTBZ256r,    X86::VPBROADCASTBZ256m,    TB_NO_REVERSE },
+    { X86::VPBROADCASTDZ256r,    X86::VPBROADCASTDZ256m,    TB_NO_REVERSE },
+    { X86::VPBROADCASTQZ256r,    X86::VPBROADCASTQZ256m,    TB_NO_REVERSE },
+    { X86::VPBROADCASTWZ256r,    X86::VPBROADCASTWZ256m,    TB_NO_REVERSE },
     { X86::VPCONFLICTDZ256rr,    X86::VPCONFLICTDZ256rm,    0 },
     { X86::VPCONFLICTQZ256rr,    X86::VPCONFLICTQZ256rm,    0 },
     { X86::VPERMILPDZ256ri,      X86::VPERMILPDZ256mi,      0 },
@@ -1052,6 +1088,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPSRLDZ256ri,         X86::VPSRLDZ256mi,         0 },
     { X86::VPSRLQZ256ri,         X86::VPSRLQZ256mi,         0 },
     { X86::VPSRLWZ256ri,         X86::VPSRLWZ256mi,         0 },
+    { X86::VRCP14PDZ256r,        X86::VRCP14PDZ256m,        0 },
+    { X86::VRCP14PSZ256r,        X86::VRCP14PSZ256m,        0 },
+    { X86::VRSQRT14PDZ256r,      X86::VRSQRT14PDZ256m,      0 },
+    { X86::VRSQRT14PSZ256r,      X86::VRSQRT14PSZ256m,      0 },
+    { X86::VSQRTPDZ256r,         X86::VSQRTPDZ256m,         0 },
+    { X86::VSQRTPSZ256r,         X86::VSQRTPSZ256m,         0 },
 
     // AVX-512 foldable instructions (128-bit versions)
     { X86::VBROADCASTSSZ128r,    X86::VBROADCASTSSZ128m,    TB_NO_REVERSE },
@@ -1060,18 +1102,25 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VCVTUDQ2PDZ128rr,     X86::VCVTUDQ2PDZ128rm,     TB_NO_REVERSE },
     { X86::VMOVAPDZ128rr,        X86::VMOVAPDZ128rm,        TB_ALIGN_16 },
     { X86::VMOVAPSZ128rr,        X86::VMOVAPSZ128rm,        TB_ALIGN_16 },
+    { X86::VMOVDDUPZ128rr,       X86::VMOVDDUPZ128rm,       0 },
     { X86::VMOVDQA32Z128rr,      X86::VMOVDQA32Z128rm,      TB_ALIGN_16 },
     { X86::VMOVDQA64Z128rr,      X86::VMOVDQA64Z128rm,      TB_ALIGN_16 },
     { X86::VMOVDQU8Z128rr,       X86::VMOVDQU8Z128rm,       0 },
     { X86::VMOVDQU16Z128rr,      X86::VMOVDQU16Z128rm,      0 },
     { X86::VMOVDQU32Z128rr,      X86::VMOVDQU32Z128rm,      0 },
     { X86::VMOVDQU64Z128rr,      X86::VMOVDQU64Z128rm,      0 },
+    { X86::VMOVSHDUPZ128rr,      X86::VMOVSHDUPZ128rm,      0 },
+    { X86::VMOVSLDUPZ128rr,      X86::VMOVSLDUPZ128rm,      0 },
     { X86::VMOVUPDZ128rr,        X86::VMOVUPDZ128rm,        0 },
     { X86::VMOVUPSZ128rr,        X86::VMOVUPSZ128rm,        0 },
     { X86::VPABSBZ128rr,         X86::VPABSBZ128rm,         0 },
     { X86::VPABSDZ128rr,         X86::VPABSDZ128rm,         0 },
     { X86::VPABSQZ128rr,         X86::VPABSQZ128rm,         0 },
     { X86::VPABSWZ128rr,         X86::VPABSWZ128rm,         0 },
+    { X86::VPBROADCASTBZ128r,    X86::VPBROADCASTBZ128m,    TB_NO_REVERSE },
+    { X86::VPBROADCASTDZ128r,    X86::VPBROADCASTDZ128m,    TB_NO_REVERSE },
+    { X86::VPBROADCASTQZ128r,    X86::VPBROADCASTQZ128m,    TB_NO_REVERSE },
+    { X86::VPBROADCASTWZ128r,    X86::VPBROADCASTWZ128m,    TB_NO_REVERSE },
     { X86::VPCONFLICTDZ128rr,    X86::VPCONFLICTDZ128rm,    0 },
     { X86::VPCONFLICTQZ128rr,    X86::VPCONFLICTQZ128rm,    0 },
     { X86::VPERMILPDZ128ri,      X86::VPERMILPDZ128mi,      0 },
@@ -1108,6 +1157,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPSRLDZ128ri,         X86::VPSRLDZ128mi,         0 },
     { X86::VPSRLQZ128ri,         X86::VPSRLQZ128mi,         0 },
     { X86::VPSRLWZ128ri,         X86::VPSRLWZ128mi,         0 },
+    { X86::VRCP14PDZ128r,        X86::VRCP14PDZ128m,        0 },
+    { X86::VRCP14PSZ128r,        X86::VRCP14PSZ128m,        0 },
+    { X86::VRSQRT14PDZ128r,      X86::VRSQRT14PDZ128m,      0 },
+    { X86::VRSQRT14PSZ128r,      X86::VRSQRT14PSZ128m,      0 },
+    { X86::VSQRTPDZ128r,         X86::VSQRTPDZ128m,         0 },
+    { X86::VSQRTPSZ128r,         X86::VSQRTPSZ128m,         0 },
 
     // F16C foldable instructions
     { X86::VCVTPH2PSrr,        X86::VCVTPH2PSrm,            TB_NO_REVERSE },
@@ -2063,13 +2118,28 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMINUQZrr,        X86::VPMINUQZrm,          0 },
     { X86::VPMINUWZrr,        X86::VPMINUWZrm,          0 },
     { X86::VPMULDQZrr,        X86::VPMULDQZrm,          0 },
+    { X86::VPMULHRSWZrr,      X86::VPMULHRSWZrm,        0 },
+    { X86::VPMULHUWZrr,       X86::VPMULHUWZrm,         0 },
+    { X86::VPMULHWZrr,        X86::VPMULHWZrm,          0 },
     { X86::VPMULLDZrr,        X86::VPMULLDZrm,          0 },
     { X86::VPMULLQZrr,        X86::VPMULLQZrm,          0 },
     { X86::VPMULLWZrr,        X86::VPMULLWZrm,          0 },
+    { X86::VPMULTISHIFTQBZrr, X86::VPMULTISHIFTQBZrm,   0 },
     { X86::VPMULUDQZrr,       X86::VPMULUDQZrm,         0 },
     { X86::VPORDZrr,          X86::VPORDZrm,            0 },
     { X86::VPORQZrr,          X86::VPORQZrm,            0 },
+    { X86::VPROLVDZrr,        X86::VPROLVDZrm,          0 },
+    { X86::VPROLVQZrr,        X86::VPROLVQZrm,          0 },
+    { X86::VPRORVDZrr,        X86::VPRORVDZrm,          0 },
+    { X86::VPRORVQZrr,        X86::VPRORVQZrm,          0 },
     { X86::VPSADBWZrr,        X86::VPSADBWZrm,          0 },
+    { X86::VPSHLDDZrri,       X86::VPSHLDDZrmi,         0 },
+    { X86::VPSHLDQZrri,       X86::VPSHLDQZrmi,         0 },
+    { X86::VPSHLDWZrri,       X86::VPSHLDWZrmi,         0 },
+    { X86::VPSHRDDZrri,       X86::VPSHRDDZrmi,         0 },
+    { X86::VPSHRDQZrri,       X86::VPSHRDQZrmi,         0 },
+    { X86::VPSHRDWZrri,       X86::VPSHRDWZrmi,         0 },
+    { X86::VPSHUFBITQMBZrr,   X86::VPSHUFBITQMBZrm,     0 },
     { X86::VPSHUFBZrr,        X86::VPSHUFBZrm,          0 },
     { X86::VPSLLDZrr,         X86::VPSLLDZrm,           0 },
     { X86::VPSLLQZrr,         X86::VPSLLQZrm,           0 },
@@ -2097,6 +2167,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPSUBUSBZrr,       X86::VPSUBUSBZrm,         0 },
     { X86::VPSUBUSWZrr,       X86::VPSUBUSWZrm,         0 },
     { X86::VPSUBWZrr,         X86::VPSUBWZrm,           0 },
+    { X86::VPTESTMBZrr,       X86::VPTESTMBZrm,         0 },
+    { X86::VPTESTMDZrr,       X86::VPTESTMDZrm,         0 },
+    { X86::VPTESTMQZrr,       X86::VPTESTMQZrm,         0 },
+    { X86::VPTESTMWZrr,       X86::VPTESTMWZrm,         0 },
+    { X86::VPTESTNMBZrr,      X86::VPTESTNMBZrm,        0 },
+    { X86::VPTESTNMDZrr,      X86::VPTESTNMDZrm,        0 },
+    { X86::VPTESTNMQZrr,      X86::VPTESTNMQZrm,        0 },
+    { X86::VPTESTNMWZrr,      X86::VPTESTNMWZrm,        0 },
     { X86::VPUNPCKHBWZrr,     X86::VPUNPCKHBWZrm,       0 },
     { X86::VPUNPCKHDQZrr,     X86::VPUNPCKHDQZrm,       0 },
     { X86::VPUNPCKHQDQZrr,    X86::VPUNPCKHQDQZrm,      0 },
@@ -2107,12 +2185,38 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPUNPCKLWDZrr,     X86::VPUNPCKLWDZrm,       0 },
     { X86::VPXORDZrr,         X86::VPXORDZrm,           0 },
     { X86::VPXORQZrr,         X86::VPXORQZrm,           0 },
+    { X86::VRANGEPDZrri,      X86::VRANGEPDZrmi,        0 },
+    { X86::VRANGEPSZrri,      X86::VRANGEPSZrmi,        0 },
+    { X86::VRANGESDZrri,      X86::VRANGESDZrmi,        TB_NO_REVERSE },
+    { X86::VRANGESSZrri,      X86::VRANGESSZrmi,        TB_NO_REVERSE },
+    { X86::VRCP14SDrr,        X86::VRCP14SDrm,          TB_NO_REVERSE },
+    { X86::VRCP14SSrr,        X86::VRCP14SSrm,          TB_NO_REVERSE },
+    { X86::VRCP28SDr,         X86::VRCP28SDm,           TB_NO_REVERSE },
+    { X86::VRCP28SSr,         X86::VRCP28SSm,           TB_NO_REVERSE },
+    { X86::VREDUCESDZrri,     X86::VREDUCESDZrmi,       TB_NO_REVERSE },
+    { X86::VREDUCESSZrri,     X86::VREDUCESSZrmi,       TB_NO_REVERSE },
+    { X86::VRNDSCALESDr,      X86::VRNDSCALESDm,        0 },
+    { X86::VRNDSCALESDr_Int,  X86::VRNDSCALESDm_Int,    TB_NO_REVERSE },
+    { X86::VRNDSCALESSr,      X86::VRNDSCALESSm,        0 },
+    { X86::VRNDSCALESSr_Int,  X86::VRNDSCALESSm_Int,    TB_NO_REVERSE },
+    { X86::VRSQRT14SDrr,      X86::VRSQRT14SDrm,        TB_NO_REVERSE },
+    { X86::VRSQRT14SSrr,      X86::VRSQRT14SSrm,        TB_NO_REVERSE },
+    { X86::VRSQRT28SDr,       X86::VRSQRT28SDm,         TB_NO_REVERSE },
+    { X86::VRSQRT28SSr,       X86::VRSQRT28SSm,         TB_NO_REVERSE },
+    { X86::VSCALEFPDZrr,      X86::VSCALEFPDZrm,        0 },
+    { X86::VSCALEFPSZrr,      X86::VSCALEFPSZrm,        0 },
+    { X86::VSCALEFSDZrr,      X86::VSCALEFSDZrm,        TB_NO_REVERSE },
+    { X86::VSCALEFSSZrr,      X86::VSCALEFSSZrm,        TB_NO_REVERSE },
     { X86::VSHUFF32X4Zrri,    X86::VSHUFF32X4Zrmi,      0 },
     { X86::VSHUFF64X2Zrri,    X86::VSHUFF64X2Zrmi,      0 },
     { X86::VSHUFI64X2Zrri,    X86::VSHUFI64X2Zrmi,      0 },
     { X86::VSHUFI32X4Zrri,    X86::VSHUFI32X4Zrmi,      0 },
     { X86::VSHUFPDZrri,       X86::VSHUFPDZrmi,         0 },
     { X86::VSHUFPSZrri,       X86::VSHUFPSZrmi,         0 },
+    { X86::VSQRTSDZr,         X86::VSQRTSDZm,           0 },
+    { X86::VSQRTSDZr_Int,     X86::VSQRTSDZm_Int,       TB_NO_REVERSE },
+    { X86::VSQRTSSZr,         X86::VSQRTSSZm,           0 },
+    { X86::VSQRTSSZr_Int,     X86::VSQRTSSZm_Int,       TB_NO_REVERSE },
     { X86::VSUBPDZrr,         X86::VSUBPDZrm,           0 },
     { X86::VSUBPSZrr,         X86::VSUBPSZrm,           0 },
     { X86::VSUBSDZrr,         X86::VSUBSDZrm,           0 },
@@ -2299,20 +2403,50 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMINUWZ256rr,     X86::VPMINUWZ256rm,       0 },
     { X86::VPMULDQZ128rr,     X86::VPMULDQZ128rm,       0 },
     { X86::VPMULDQZ256rr,     X86::VPMULDQZ256rm,       0 },
+    { X86::VPMULHRSWZ128rr,   X86::VPMULHRSWZ128rm,     0 },
+    { X86::VPMULHRSWZ256rr,   X86::VPMULHRSWZ256rm,     0 },
+    { X86::VPMULHUWZ128rr,    X86::VPMULHUWZ128rm,      0 },
+    { X86::VPMULHUWZ256rr,    X86::VPMULHUWZ256rm,      0 },
+    { X86::VPMULHWZ128rr,     X86::VPMULHWZ128rm,       0 },
+    { X86::VPMULHWZ256rr,     X86::VPMULHWZ256rm,       0 },
     { X86::VPMULLDZ128rr,     X86::VPMULLDZ128rm,       0 },
     { X86::VPMULLDZ256rr,     X86::VPMULLDZ256rm,       0 },
     { X86::VPMULLQZ128rr,     X86::VPMULLQZ128rm,       0 },
     { X86::VPMULLQZ256rr,     X86::VPMULLQZ256rm,       0 },
     { X86::VPMULLWZ128rr,     X86::VPMULLWZ128rm,       0 },
     { X86::VPMULLWZ256rr,     X86::VPMULLWZ256rm,       0 },
+    { X86::VPMULTISHIFTQBZ128rr, X86::VPMULTISHIFTQBZ128rm, 0 },
+    { X86::VPMULTISHIFTQBZ256rr, X86::VPMULTISHIFTQBZ256rm, 0 },
     { X86::VPMULUDQZ128rr,    X86::VPMULUDQZ128rm,      0 },
     { X86::VPMULUDQZ256rr,    X86::VPMULUDQZ256rm,      0 },
     { X86::VPORDZ128rr,       X86::VPORDZ128rm,         0 },
     { X86::VPORDZ256rr,       X86::VPORDZ256rm,         0 },
     { X86::VPORQZ128rr,       X86::VPORQZ128rm,         0 },
     { X86::VPORQZ256rr,       X86::VPORQZ256rm,         0 },
+    { X86::VPROLVDZ128rr,     X86::VPROLVDZ128rm,       0 },
+    { X86::VPROLVDZ256rr,     X86::VPROLVDZ256rm,       0 },
+    { X86::VPROLVQZ128rr,     X86::VPROLVQZ128rm,       0 },
+    { X86::VPROLVQZ256rr,     X86::VPROLVQZ256rm,       0 },
+    { X86::VPRORVDZ128rr,     X86::VPRORVDZ128rm,       0 },
+    { X86::VPRORVDZ256rr,     X86::VPRORVDZ256rm,       0 },
+    { X86::VPRORVQZ128rr,     X86::VPRORVQZ128rm,       0 },
+    { X86::VPRORVQZ256rr,     X86::VPRORVQZ256rm,       0 },
     { X86::VPSADBWZ128rr,     X86::VPSADBWZ128rm,       0 },
     { X86::VPSADBWZ256rr,     X86::VPSADBWZ256rm,       0 },
+    { X86::VPSHLDDZ128rri,    X86::VPSHLDDZ128rmi,      0 },
+    { X86::VPSHLDDZ256rri,    X86::VPSHLDDZ256rmi,      0 },
+    { X86::VPSHLDQZ128rri,    X86::VPSHLDQZ128rmi,      0 },
+    { X86::VPSHLDQZ256rri,    X86::VPSHLDQZ256rmi,      0 },
+    { X86::VPSHLDWZ128rri,    X86::VPSHLDWZ128rmi,      0 },
+    { X86::VPSHLDWZ256rri,    X86::VPSHLDWZ256rmi,      0 },
+    { X86::VPSHRDDZ128rri,    X86::VPSHRDDZ128rmi,      0 },
+    { X86::VPSHRDDZ256rri,    X86::VPSHRDDZ256rmi,      0 },
+    { X86::VPSHRDQZ128rri,    X86::VPSHRDQZ128rmi,      0 },
+    { X86::VPSHRDQZ256rri,    X86::VPSHRDQZ256rmi,      0 },
+    { X86::VPSHRDWZ128rri,    X86::VPSHRDWZ128rmi,      0 },
+    { X86::VPSHRDWZ256rri,    X86::VPSHRDWZ256rmi,      0 },
+    { X86::VPSHUFBITQMBZ128rr,X86::VPSHUFBITQMBZ128rm,  0 },
+    { X86::VPSHUFBITQMBZ256rr,X86::VPSHUFBITQMBZ256rm,  0 },
     { X86::VPSHUFBZ128rr,     X86::VPSHUFBZ128rm,       0 },
     { X86::VPSHUFBZ256rr,     X86::VPSHUFBZ256rm,       0 },
     { X86::VPSLLDZ128rr,      X86::VPSLLDZ128rm,        0 },
@@ -2367,6 +2501,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPSUBUSWZ256rr,    X86::VPSUBUSWZ256rm,      0 },
     { X86::VPSUBWZ128rr,      X86::VPSUBWZ128rm,        0 },
     { X86::VPSUBWZ256rr,      X86::VPSUBWZ256rm,        0 },
+    { X86::VPTESTMBZ128rr,    X86::VPTESTMBZ128rm,      0 },
+    { X86::VPTESTMBZ256rr,    X86::VPTESTMBZ256rm,      0 },
+    { X86::VPTESTMDZ128rr,    X86::VPTESTMDZ128rm,      0 },
+    { X86::VPTESTMDZ256rr,    X86::VPTESTMDZ256rm,      0 },
+    { X86::VPTESTMQZ128rr,    X86::VPTESTMQZ128rm,      0 },
+    { X86::VPTESTMQZ256rr,    X86::VPTESTMQZ256rm,      0 },
+    { X86::VPTESTMWZ128rr,    X86::VPTESTMWZ128rm,      0 },
+    { X86::VPTESTMWZ256rr,    X86::VPTESTMWZ256rm,      0 },
+    { X86::VPTESTNMBZ128rr,   X86::VPTESTNMBZ128rm,     0 },
+    { X86::VPTESTNMBZ256rr,   X86::VPTESTNMBZ256rm,     0 },
+    { X86::VPTESTNMDZ128rr,   X86::VPTESTNMDZ128rm,     0 },
+    { X86::VPTESTNMDZ256rr,   X86::VPTESTNMDZ256rm,     0 },
+    { X86::VPTESTNMQZ128rr,   X86::VPTESTNMQZ128rm,     0 },
+    { X86::VPTESTNMQZ256rr,   X86::VPTESTNMQZ256rm,     0 },
+    { X86::VPTESTNMWZ128rr,   X86::VPTESTNMWZ128rm,     0 },
+    { X86::VPTESTNMWZ256rr,   X86::VPTESTNMWZ256rm,     0 },
     { X86::VPUNPCKHBWZ128rr,  X86::VPUNPCKHBWZ128rm,    0 },
     { X86::VPUNPCKHBWZ256rr,  X86::VPUNPCKHBWZ256rm,    0 },
     { X86::VPUNPCKHDQZ128rr,  X86::VPUNPCKHDQZ128rm,    0 },
@@ -2387,6 +2537,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPXORDZ256rr,      X86::VPXORDZ256rm,        0 },
     { X86::VPXORQZ128rr,      X86::VPXORQZ128rm,        0 },
     { X86::VPXORQZ256rr,      X86::VPXORQZ256rm,        0 },
+    { X86::VRANGEPDZ128rri,   X86::VRANGEPDZ128rmi,     0 },
+    { X86::VRANGEPDZ256rri,   X86::VRANGEPDZ256rmi,     0 },
+    { X86::VRANGEPSZ128rri,   X86::VRANGEPSZ128rmi,     0 },
+    { X86::VRANGEPSZ256rri,   X86::VRANGEPSZ256rmi,     0 },
+    { X86::VSCALEFPDZ128rr,   X86::VSCALEFPDZ128rm,     0 },
+    { X86::VSCALEFPDZ256rr,   X86::VSCALEFPDZ256rm,     0 },
+    { X86::VSCALEFPSZ128rr,   X86::VSCALEFPSZ128rm,     0 },
+    { X86::VSCALEFPSZ256rr,   X86::VSCALEFPSZ256rm,     0 },
     { X86::VSHUFF32X4Z256rri, X86::VSHUFF32X4Z256rmi,   0 },
     { X86::VSHUFF64X2Z256rri, X86::VSHUFF64X2Z256rmi,   0 },
     { X86::VSHUFI32X4Z256rri, X86::VSHUFI32X4Z256rmi,   0 },
@@ -2547,9 +2705,25 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::AESENCLASTrr,      X86::AESENCLASTrm,        TB_ALIGN_16 },
     { X86::AESENCrr,          X86::AESENCrm,            TB_ALIGN_16 },
     { X86::VAESDECLASTrr,     X86::VAESDECLASTrm,       0 },
+    { X86::VAESDECLASTYrr,    X86::VAESDECLASTYrm,      0 },
+    { X86::VAESDECLASTZ128rr, X86::VAESDECLASTZ128rm,   0 },
+    { X86::VAESDECLASTZ256rr, X86::VAESDECLASTZ256rm,   0 },
+    { X86::VAESDECLASTZrr,    X86::VAESDECLASTZrm,      0 },
     { X86::VAESDECrr,         X86::VAESDECrm,           0 },
+    { X86::VAESDECYrr,        X86::VAESDECYrm,          0 },
+    { X86::VAESDECZ128rr,     X86::VAESDECZ128rm,       0 },
+    { X86::VAESDECZ256rr,     X86::VAESDECZ256rm,       0 },
+    { X86::VAESDECZrr,        X86::VAESDECZrm,          0 },
     { X86::VAESENCLASTrr,     X86::VAESENCLASTrm,       0 },
+    { X86::VAESENCLASTYrr,    X86::VAESENCLASTYrm,      0 },
+    { X86::VAESENCLASTZ128rr, X86::VAESENCLASTZ128rm,   0 },
+    { X86::VAESENCLASTZ256rr, X86::VAESENCLASTZ256rm,   0 },
+    { X86::VAESENCLASTZrr,    X86::VAESENCLASTZrm,      0 },
     { X86::VAESENCrr,         X86::VAESENCrm,           0 },
+    { X86::VAESENCYrr,        X86::VAESENCYrm,          0 },
+    { X86::VAESENCZ128rr,     X86::VAESENCZ128rm,       0 },
+    { X86::VAESENCZ256rr,     X86::VAESENCZ256rm,       0 },
+    { X86::VAESENCZrr,        X86::VAESENCZrm,          0 },
 
     // SHA foldable instructions
     { X86::SHA1MSG1rr,        X86::SHA1MSG1rm,          TB_ALIGN_16 },
@@ -2558,7 +2732,27 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::SHA1RNDS4rri,      X86::SHA1RNDS4rmi,        TB_ALIGN_16 },
     { X86::SHA256MSG1rr,      X86::SHA256MSG1rm,        TB_ALIGN_16 },
     { X86::SHA256MSG2rr,      X86::SHA256MSG2rm,        TB_ALIGN_16 },
-    { X86::SHA256RNDS2rr,     X86::SHA256RNDS2rm,       TB_ALIGN_16 }
+    { X86::SHA256RNDS2rr,     X86::SHA256RNDS2rm,       TB_ALIGN_16 },
+
+    // GFNI foldable instructions
+    { X86::GF2P8AFFINEINVQBrri,      X86::GF2P8AFFINEINVQBrmi,      TB_ALIGN_16 },
+    { X86::GF2P8AFFINEQBrri,         X86::GF2P8AFFINEQBrmi,         TB_ALIGN_16 },
+    { X86::GF2P8MULBrr,              X86::GF2P8MULBrm,              TB_ALIGN_16 },
+    { X86::VGF2P8AFFINEINVQBrri,     X86::VGF2P8AFFINEINVQBrmi,     0 },
+    { X86::VGF2P8AFFINEINVQBYrri,    X86::VGF2P8AFFINEINVQBYrmi,    0 },
+    { X86::VGF2P8AFFINEINVQBZ128rri, X86::VGF2P8AFFINEINVQBZ128rmi, 0 },
+    { X86::VGF2P8AFFINEINVQBZ256rri, X86::VGF2P8AFFINEINVQBZ256rmi, 0 },
+    { X86::VGF2P8AFFINEINVQBZrri,    X86::VGF2P8AFFINEINVQBZrmi,    0 },
+    { X86::VGF2P8AFFINEQBrri,        X86::VGF2P8AFFINEQBrmi,        0 },
+    { X86::VGF2P8AFFINEQBYrri,       X86::VGF2P8AFFINEQBYrmi,       0 },
+    { X86::VGF2P8AFFINEQBZ128rri,    X86::VGF2P8AFFINEQBZ128rmi,    0 },
+    { X86::VGF2P8AFFINEQBZ256rri,    X86::VGF2P8AFFINEQBZ256rmi,    0 },
+    { X86::VGF2P8AFFINEQBZrri,       X86::VGF2P8AFFINEQBZrmi,       0 },
+    { X86::VGF2P8MULBrr,             X86::VGF2P8MULBrm,             0 },
+    { X86::VGF2P8MULBYrr,            X86::VGF2P8MULBYrm,            0 },
+    { X86::VGF2P8MULBZ128rr,         X86::VGF2P8MULBZ128rm,         0 },
+    { X86::VGF2P8MULBZ256rr,         X86::VGF2P8MULBZ256rm,         0 },
+    { X86::VGF2P8MULBZrr,            X86::VGF2P8MULBZrm,            0 },
   };
 
   for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) {
@@ -2621,6 +2815,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPPERMrrr,             X86::VPPERMrrm,             0 },
 
     // AVX-512 instructions with 3 source operands.
+    { X86::VFIXUPIMMPDZrri,       X86::VFIXUPIMMPDZrmi,       0 },
+    { X86::VFIXUPIMMPSZrri,       X86::VFIXUPIMMPSZrmi,       0 },
+    { X86::VFIXUPIMMSDrri,        X86::VFIXUPIMMSDrmi,        TB_NO_REVERSE },
+    { X86::VFIXUPIMMSSrri,        X86::VFIXUPIMMSSrmi,        TB_NO_REVERSE },
+    { X86::VPDPBUSDSZr,           X86::VPDPBUSDSZm,           0 },
+    { X86::VPDPBUSDZr,            X86::VPDPBUSDZm,            0 },
+    { X86::VPDPWSSDSZr,           X86::VPDPWSSDSZm,           0 },
+    { X86::VPDPWSSDZr,            X86::VPDPWSSDZm,            0 },
     { X86::VPERMI2Brr,            X86::VPERMI2Brm,            0 },
     { X86::VPERMI2Drr,            X86::VPERMI2Drm,            0 },
     { X86::VPERMI2PSrr,           X86::VPERMI2PSrm,           0 },
@@ -2639,6 +2841,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPTERNLOGQZrri,        X86::VPTERNLOGQZrmi,        0 },
 
     // AVX-512VL 256-bit instructions with 3 source operands.
+    { X86::VFIXUPIMMPDZ256rri,    X86::VFIXUPIMMPDZ256rmi,    0 },
+    { X86::VFIXUPIMMPSZ256rri,    X86::VFIXUPIMMPSZ256rmi,    0 },
+    { X86::VPDPBUSDSZ256r,        X86::VPDPBUSDSZ256m,        0 },
+    { X86::VPDPBUSDZ256r,         X86::VPDPBUSDZ256m,         0 },
+    { X86::VPDPWSSDSZ256r,        X86::VPDPWSSDSZ256m,        0 },
+    { X86::VPDPWSSDZ256r,         X86::VPDPWSSDZ256m,         0 },
     { X86::VPERMI2B256rr,         X86::VPERMI2B256rm,         0 },
     { X86::VPERMI2D256rr,         X86::VPERMI2D256rm,         0 },
     { X86::VPERMI2PD256rr,        X86::VPERMI2PD256rm,        0 },
@@ -2657,6 +2865,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPTERNLOGQZ256rri,     X86::VPTERNLOGQZ256rmi,     0 },
 
     // AVX-512VL 128-bit instructions with 3 source operands.
+    { X86::VFIXUPIMMPDZ128rri,    X86::VFIXUPIMMPDZ128rmi,    0 },
+    { X86::VFIXUPIMMPSZ128rri,    X86::VFIXUPIMMPSZ128rmi,    0 },
+    { X86::VPDPBUSDSZ128r,        X86::VPDPBUSDSZ128m,        0 },
+    { X86::VPDPBUSDZ128r,         X86::VPDPBUSDZ128m,         0 },
+    { X86::VPDPWSSDSZ128r,        X86::VPDPWSSDSZ128m,        0 },
+    { X86::VPDPWSSDZ128r,         X86::VPDPWSSDZ128m,         0 },
     { X86::VPERMI2B128rr,         X86::VPERMI2B128rm,         0 },
     { X86::VPERMI2D128rr,         X86::VPERMI2D128rm,         0 },
     { X86::VPERMI2PD128rr,        X86::VPERMI2PD128rm,        0 },

Modified: llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll?rev=334728&r1=334727&r2=334728&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ss_load_fold.ll Thu Jun 14 08:40:31 2018
@@ -214,49 +214,26 @@ define <4 x float> @test4(<4 x float> %A
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    retq
 ;
-; X32_AVX1-LABEL: test4:
-; X32_AVX1:       ## %bb.0:
-; X32_AVX1-NEXT:    subl $28, %esp
-; X32_AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32_AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32_AVX1-NEXT:    vmovaps %xmm0, (%esp) ## 16-byte Spill
-; X32_AVX1-NEXT:    calll _f
-; X32_AVX1-NEXT:    vroundss $4, (%esp), %xmm0, %xmm0 ## 16-byte Folded Reload
-; X32_AVX1-NEXT:    addl $28, %esp
-; X32_AVX1-NEXT:    retl
+; X32_AVX-LABEL: test4:
+; X32_AVX:       ## %bb.0:
+; X32_AVX-NEXT:    subl $28, %esp
+; X32_AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32_AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32_AVX-NEXT:    vmovaps %xmm0, (%esp) ## 16-byte Spill
+; X32_AVX-NEXT:    calll _f
+; X32_AVX-NEXT:    vroundss $4, (%esp), %xmm0, %xmm0 ## 16-byte Folded Reload
+; X32_AVX-NEXT:    addl $28, %esp
+; X32_AVX-NEXT:    retl
 ;
-; X64_AVX1-LABEL: test4:
-; X64_AVX1:       ## %bb.0:
-; X64_AVX1-NEXT:    subq $24, %rsp
-; X64_AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64_AVX1-NEXT:    vmovaps %xmm0, (%rsp) ## 16-byte Spill
-; X64_AVX1-NEXT:    callq _f
-; X64_AVX1-NEXT:    vroundss $4, (%rsp), %xmm0, %xmm0 ## 16-byte Folded Reload
-; X64_AVX1-NEXT:    addq $24, %rsp
-; X64_AVX1-NEXT:    retq
-;
-; X32_AVX512-LABEL: test4:
-; X32_AVX512:       ## %bb.0:
-; X32_AVX512-NEXT:    subl $28, %esp
-; X32_AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32_AVX512-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32_AVX512-NEXT:    vmovaps %xmm0, (%esp) ## 16-byte Spill
-; X32_AVX512-NEXT:    calll _f
-; X32_AVX512-NEXT:    vmovaps (%esp), %xmm1 ## 16-byte Reload
-; X32_AVX512-NEXT:    vroundss $4, %xmm1, %xmm0, %xmm0
-; X32_AVX512-NEXT:    addl $28, %esp
-; X32_AVX512-NEXT:    retl
-;
-; X64_AVX512-LABEL: test4:
-; X64_AVX512:       ## %bb.0:
-; X64_AVX512-NEXT:    subq $24, %rsp
-; X64_AVX512-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64_AVX512-NEXT:    vmovaps %xmm0, (%rsp) ## 16-byte Spill
-; X64_AVX512-NEXT:    callq _f
-; X64_AVX512-NEXT:    vmovaps (%rsp), %xmm1 ## 16-byte Reload
-; X64_AVX512-NEXT:    vroundss $4, %xmm1, %xmm0, %xmm0
-; X64_AVX512-NEXT:    addq $24, %rsp
-; X64_AVX512-NEXT:    retq
+; X64_AVX-LABEL: test4:
+; X64_AVX:       ## %bb.0:
+; X64_AVX-NEXT:    subq $24, %rsp
+; X64_AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64_AVX-NEXT:    vmovaps %xmm0, (%rsp) ## 16-byte Spill
+; X64_AVX-NEXT:    callq _f
+; X64_AVX-NEXT:    vroundss $4, (%rsp), %xmm0, %xmm0 ## 16-byte Folded Reload
+; X64_AVX-NEXT:    addq $24, %rsp
+; X64_AVX-NEXT:    retq
   %a = load float , float *%b
   %B = insertelement <4 x float> undef, float %a, i32 0
   %q = call <4 x float> @f()




More information about the llvm-commits mailing list