[llvm] [WIP][X86][tablgen] Auto-gen broadcast tables (PR #73654)

Shengchen Kan via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 30 01:26:11 PST 2023


https://github.com/KanRobert updated https://github.com/llvm/llvm-project/pull/73654

>From 5bb88924a488b0447209aa68205bf380e1955485 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Tue, 28 Nov 2023 22:56:08 +0800
Subject: [PATCH] [X86][tablgen] Auto-gen broadcast tables

---
 .../include/llvm/Support/X86FoldTablesUtils.h |   7 +-
 llvm/lib/Target/X86/X86InstrAVX512.td         |   2 +
 llvm/lib/Target/X86/X86InstrFoldTables.cpp    | 296 +-----------------
 llvm/utils/TableGen/X86FoldTablesEmitter.cpp  | 192 ++++++++++--
 4 files changed, 177 insertions(+), 320 deletions(-)

diff --git a/llvm/include/llvm/Support/X86FoldTablesUtils.h b/llvm/include/llvm/Support/X86FoldTablesUtils.h
index 1cce9cdaf65f8d6..ed244febc38d3a5 100644
--- a/llvm/include/llvm/Support/X86FoldTablesUtils.h
+++ b/llvm/include/llvm/Support/X86FoldTablesUtils.h
@@ -44,15 +44,16 @@ enum {
   TB_ALIGN_MASK = 0x7 << TB_ALIGN_SHIFT,
 
   // Broadcast type.
-  // (stored in bits 12 - 13)
+  // (stored in bits 12 - 14)
   TB_BCAST_TYPE_SHIFT = TB_ALIGN_SHIFT + 3,
   TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT,
   TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT,
   TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT,
   TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT,
-  TB_BCAST_MASK = 0x3 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_SH = 4 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_MASK = 0x7 << TB_BCAST_TYPE_SHIFT,
 
-  // Unused bits 14-15
+  // Unused bits 15-16
 };
 } // namespace llvm
 #endif // LLVM_SUPPORT_X86FOLDTABLESUTILS_H
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index f325f47d46464c3..0514f0d19506707 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12875,8 +12875,10 @@ multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _
   }
 }
 
+let ExeDomain = SSEPackedInt in {
 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
+}
 
 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
                              X86SchedWriteWidths sched,
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 7a3611b90da9895..c44d77460f27df3 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -23,300 +23,6 @@ using namespace llvm;
 // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which
 // makes sorting these tables a simple matter of alphabetizing the table.
 #include "X86GenFoldTables.inc"
-static const X86FoldTableEntry BroadcastTable2[] = {
-  { X86::VADDPDZ128rr,   X86::VADDPDZ128rmb,   TB_BCAST_SD },
-  { X86::VADDPDZ256rr,   X86::VADDPDZ256rmb,   TB_BCAST_SD },
-  { X86::VADDPDZrr,      X86::VADDPDZrmb,      TB_BCAST_SD },
-  { X86::VADDPSZ128rr,   X86::VADDPSZ128rmb,   TB_BCAST_SS },
-  { X86::VADDPSZ256rr,   X86::VADDPSZ256rmb,   TB_BCAST_SS },
-  { X86::VADDPSZrr,      X86::VADDPSZrmb,      TB_BCAST_SS },
-  { X86::VANDNPDZ128rr,  X86::VANDNPDZ128rmb,  TB_BCAST_SD },
-  { X86::VANDNPDZ256rr,  X86::VANDNPDZ256rmb,  TB_BCAST_SD },
-  { X86::VANDNPDZrr,     X86::VANDNPDZrmb,     TB_BCAST_SD },
-  { X86::VANDNPSZ128rr,  X86::VANDNPSZ128rmb,  TB_BCAST_SS },
-  { X86::VANDNPSZ256rr,  X86::VANDNPSZ256rmb,  TB_BCAST_SS },
-  { X86::VANDNPSZrr,     X86::VANDNPSZrmb,     TB_BCAST_SS },
-  { X86::VANDPDZ128rr,   X86::VANDPDZ128rmb,   TB_BCAST_SD },
-  { X86::VANDPDZ256rr,   X86::VANDPDZ256rmb,   TB_BCAST_SD },
-  { X86::VANDPDZrr,      X86::VANDPDZrmb,      TB_BCAST_SD },
-  { X86::VANDPSZ128rr,   X86::VANDPSZ128rmb,   TB_BCAST_SS },
-  { X86::VANDPSZ256rr,   X86::VANDPSZ256rmb,   TB_BCAST_SS },
-  { X86::VANDPSZrr,      X86::VANDPSZrmb,      TB_BCAST_SS },
-  { X86::VCMPPDZ128rri,  X86::VCMPPDZ128rmbi,  TB_BCAST_SD },
-  { X86::VCMPPDZ256rri,  X86::VCMPPDZ256rmbi,  TB_BCAST_SD },
-  { X86::VCMPPDZrri,     X86::VCMPPDZrmbi,     TB_BCAST_SD },
-  { X86::VCMPPSZ128rri,  X86::VCMPPSZ128rmbi,  TB_BCAST_SS },
-  { X86::VCMPPSZ256rri,  X86::VCMPPSZ256rmbi,  TB_BCAST_SS },
-  { X86::VCMPPSZrri,     X86::VCMPPSZrmbi,     TB_BCAST_SS },
-  { X86::VDIVPDZ128rr,   X86::VDIVPDZ128rmb,   TB_BCAST_SD },
-  { X86::VDIVPDZ256rr,   X86::VDIVPDZ256rmb,   TB_BCAST_SD },
-  { X86::VDIVPDZrr,      X86::VDIVPDZrmb,      TB_BCAST_SD },
-  { X86::VDIVPSZ128rr,   X86::VDIVPSZ128rmb,   TB_BCAST_SS },
-  { X86::VDIVPSZ256rr,   X86::VDIVPSZ256rmb,   TB_BCAST_SS },
-  { X86::VDIVPSZrr,      X86::VDIVPSZrmb,      TB_BCAST_SS },
-  { X86::VMAXCPDZ128rr,  X86::VMAXCPDZ128rmb,  TB_BCAST_SD },
-  { X86::VMAXCPDZ256rr,  X86::VMAXCPDZ256rmb,  TB_BCAST_SD },
-  { X86::VMAXCPDZrr,     X86::VMAXCPDZrmb,     TB_BCAST_SD },
-  { X86::VMAXCPSZ128rr,  X86::VMAXCPSZ128rmb,  TB_BCAST_SS },
-  { X86::VMAXCPSZ256rr,  X86::VMAXCPSZ256rmb,  TB_BCAST_SS },
-  { X86::VMAXCPSZrr,     X86::VMAXCPSZrmb,     TB_BCAST_SS },
-  { X86::VMAXPDZ128rr,   X86::VMAXPDZ128rmb,   TB_BCAST_SD },
-  { X86::VMAXPDZ256rr,   X86::VMAXPDZ256rmb,   TB_BCAST_SD },
-  { X86::VMAXPDZrr,      X86::VMAXPDZrmb,      TB_BCAST_SD },
-  { X86::VMAXPSZ128rr,   X86::VMAXPSZ128rmb,   TB_BCAST_SS },
-  { X86::VMAXPSZ256rr,   X86::VMAXPSZ256rmb,   TB_BCAST_SS },
-  { X86::VMAXPSZrr,      X86::VMAXPSZrmb,      TB_BCAST_SS },
-  { X86::VMINCPDZ128rr,  X86::VMINCPDZ128rmb,  TB_BCAST_SD },
-  { X86::VMINCPDZ256rr,  X86::VMINCPDZ256rmb,  TB_BCAST_SD },
-  { X86::VMINCPDZrr,     X86::VMINCPDZrmb,     TB_BCAST_SD },
-  { X86::VMINCPSZ128rr,  X86::VMINCPSZ128rmb,  TB_BCAST_SS },
-  { X86::VMINCPSZ256rr,  X86::VMINCPSZ256rmb,  TB_BCAST_SS },
-  { X86::VMINCPSZrr,     X86::VMINCPSZrmb,     TB_BCAST_SS },
-  { X86::VMINPDZ128rr,   X86::VMINPDZ128rmb,   TB_BCAST_SD },
-  { X86::VMINPDZ256rr,   X86::VMINPDZ256rmb,   TB_BCAST_SD },
-  { X86::VMINPDZrr,      X86::VMINPDZrmb,      TB_BCAST_SD },
-  { X86::VMINPSZ128rr,   X86::VMINPSZ128rmb,   TB_BCAST_SS },
-  { X86::VMINPSZ256rr,   X86::VMINPSZ256rmb,   TB_BCAST_SS },
-  { X86::VMINPSZrr,      X86::VMINPSZrmb,      TB_BCAST_SS },
-  { X86::VMULPDZ128rr,   X86::VMULPDZ128rmb,   TB_BCAST_SD },
-  { X86::VMULPDZ256rr,   X86::VMULPDZ256rmb,   TB_BCAST_SD },
-  { X86::VMULPDZrr,      X86::VMULPDZrmb,      TB_BCAST_SD },
-  { X86::VMULPSZ128rr,   X86::VMULPSZ128rmb,   TB_BCAST_SS },
-  { X86::VMULPSZ256rr,   X86::VMULPSZ256rmb,   TB_BCAST_SS },
-  { X86::VMULPSZrr,      X86::VMULPSZrmb,      TB_BCAST_SS },
-  { X86::VORPDZ128rr,    X86::VORPDZ128rmb,    TB_BCAST_SD },
-  { X86::VORPDZ256rr,    X86::VORPDZ256rmb,    TB_BCAST_SD },
-  { X86::VORPDZrr,       X86::VORPDZrmb,       TB_BCAST_SD },
-  { X86::VORPSZ128rr,    X86::VORPSZ128rmb,    TB_BCAST_SS },
-  { X86::VORPSZ256rr,    X86::VORPSZ256rmb,    TB_BCAST_SS },
-  { X86::VORPSZrr,       X86::VORPSZrmb,       TB_BCAST_SS },
-  { X86::VPADDDZ128rr,   X86::VPADDDZ128rmb,   TB_BCAST_D },
-  { X86::VPADDDZ256rr,   X86::VPADDDZ256rmb,   TB_BCAST_D },
-  { X86::VPADDDZrr,      X86::VPADDDZrmb,      TB_BCAST_D },
-  { X86::VPADDQZ128rr,   X86::VPADDQZ128rmb,   TB_BCAST_Q },
-  { X86::VPADDQZ256rr,   X86::VPADDQZ256rmb,   TB_BCAST_Q },
-  { X86::VPADDQZrr,      X86::VPADDQZrmb,      TB_BCAST_Q },
-  { X86::VPANDDZ128rr,   X86::VPANDDZ128rmb,   TB_BCAST_D },
-  { X86::VPANDDZ256rr,   X86::VPANDDZ256rmb,   TB_BCAST_D },
-  { X86::VPANDDZrr,      X86::VPANDDZrmb,      TB_BCAST_D },
-  { X86::VPANDNDZ128rr,  X86::VPANDNDZ128rmb,  TB_BCAST_D },
-  { X86::VPANDNDZ256rr,  X86::VPANDNDZ256rmb,  TB_BCAST_D },
-  { X86::VPANDNDZrr,     X86::VPANDNDZrmb,     TB_BCAST_D },
-  { X86::VPANDNQZ128rr,  X86::VPANDNQZ128rmb,  TB_BCAST_Q },
-  { X86::VPANDNQZ256rr,  X86::VPANDNQZ256rmb,  TB_BCAST_Q },
-  { X86::VPANDNQZrr,     X86::VPANDNQZrmb,     TB_BCAST_Q },
-  { X86::VPANDQZ128rr,   X86::VPANDQZ128rmb,   TB_BCAST_Q },
-  { X86::VPANDQZ256rr,   X86::VPANDQZ256rmb,   TB_BCAST_Q },
-  { X86::VPANDQZrr,      X86::VPANDQZrmb,      TB_BCAST_Q },
-  { X86::VPCMPDZ128rri,  X86::VPCMPDZ128rmib,  TB_BCAST_D },
-  { X86::VPCMPDZ256rri,  X86::VPCMPDZ256rmib,  TB_BCAST_D },
-  { X86::VPCMPDZrri,     X86::VPCMPDZrmib,     TB_BCAST_D },
-  { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rmb, TB_BCAST_D },
-  { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rmb, TB_BCAST_D },
-  { X86::VPCMPEQDZrr,    X86::VPCMPEQDZrmb,    TB_BCAST_D },
-  { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rmb, TB_BCAST_Q },
-  { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rmb, TB_BCAST_Q },
-  { X86::VPCMPEQQZrr,    X86::VPCMPEQQZrmb,    TB_BCAST_Q },
-  { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rmb, TB_BCAST_D },
-  { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rmb, TB_BCAST_D },
-  { X86::VPCMPGTDZrr,    X86::VPCMPGTDZrmb,    TB_BCAST_D },
-  { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rmb, TB_BCAST_Q },
-  { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rmb, TB_BCAST_Q },
-  { X86::VPCMPGTQZrr,    X86::VPCMPGTQZrmb,    TB_BCAST_Q },
-  { X86::VPCMPQZ128rri,  X86::VPCMPQZ128rmib,  TB_BCAST_Q },
-  { X86::VPCMPQZ256rri,  X86::VPCMPQZ256rmib,  TB_BCAST_Q },
-  { X86::VPCMPQZrri,     X86::VPCMPQZrmib,     TB_BCAST_Q },
-  { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmib, TB_BCAST_D },
-  { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmib, TB_BCAST_D },
-  { X86::VPCMPUDZrri,    X86::VPCMPUDZrmib,    TB_BCAST_D },
-  { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmib, TB_BCAST_Q },
-  { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmib, TB_BCAST_Q },
-  { X86::VPCMPUQZrri,    X86::VPCMPUQZrmib,    TB_BCAST_Q },
-  { X86::VPMAXSDZ128rr,  X86::VPMAXSDZ128rmb,  TB_BCAST_D },
-  { X86::VPMAXSDZ256rr,  X86::VPMAXSDZ256rmb,  TB_BCAST_D },
-  { X86::VPMAXSDZrr,     X86::VPMAXSDZrmb,     TB_BCAST_D },
-  { X86::VPMAXSQZ128rr,  X86::VPMAXSQZ128rmb,  TB_BCAST_Q },
-  { X86::VPMAXSQZ256rr,  X86::VPMAXSQZ256rmb,  TB_BCAST_Q },
-  { X86::VPMAXSQZrr,     X86::VPMAXSQZrmb,     TB_BCAST_Q },
-  { X86::VPMAXUDZ128rr,  X86::VPMAXUDZ128rmb,  TB_BCAST_D },
-  { X86::VPMAXUDZ256rr,  X86::VPMAXUDZ256rmb,  TB_BCAST_D },
-  { X86::VPMAXUDZrr,     X86::VPMAXUDZrmb,     TB_BCAST_D },
-  { X86::VPMAXUQZ128rr,  X86::VPMAXUQZ128rmb,  TB_BCAST_Q },
-  { X86::VPMAXUQZ256rr,  X86::VPMAXUQZ256rmb,  TB_BCAST_Q },
-  { X86::VPMAXUQZrr,     X86::VPMAXUQZrmb,     TB_BCAST_Q },
-  { X86::VPMINSDZ128rr,  X86::VPMINSDZ128rmb,  TB_BCAST_D },
-  { X86::VPMINSDZ256rr,  X86::VPMINSDZ256rmb,  TB_BCAST_D },
-  { X86::VPMINSDZrr,     X86::VPMINSDZrmb,     TB_BCAST_D },
-  { X86::VPMINSQZ128rr,  X86::VPMINSQZ128rmb,  TB_BCAST_Q },
-  { X86::VPMINSQZ256rr,  X86::VPMINSQZ256rmb,  TB_BCAST_Q },
-  { X86::VPMINSQZrr,     X86::VPMINSQZrmb,     TB_BCAST_Q },
-  { X86::VPMINUDZ128rr,  X86::VPMINUDZ128rmb,  TB_BCAST_D },
-  { X86::VPMINUDZ256rr,  X86::VPMINUDZ256rmb,  TB_BCAST_D },
-  { X86::VPMINUDZrr,     X86::VPMINUDZrmb,     TB_BCAST_D },
-  { X86::VPMINUQZ128rr,  X86::VPMINUQZ128rmb,  TB_BCAST_Q },
-  { X86::VPMINUQZ256rr,  X86::VPMINUQZ256rmb,  TB_BCAST_Q },
-  { X86::VPMINUQZrr,     X86::VPMINUQZrmb,     TB_BCAST_Q },
-  { X86::VPMULLDZ128rr,  X86::VPMULLDZ128rmb,  TB_BCAST_D },
-  { X86::VPMULLDZ256rr,  X86::VPMULLDZ256rmb,  TB_BCAST_D },
-  { X86::VPMULLDZrr,     X86::VPMULLDZrmb,     TB_BCAST_D },
-  { X86::VPMULLQZ128rr,  X86::VPMULLQZ128rmb,  TB_BCAST_Q },
-  { X86::VPMULLQZ256rr,  X86::VPMULLQZ256rmb,  TB_BCAST_Q },
-  { X86::VPMULLQZrr,     X86::VPMULLQZrmb,     TB_BCAST_Q },
-  { X86::VPORDZ128rr,    X86::VPORDZ128rmb,    TB_BCAST_D },
-  { X86::VPORDZ256rr,    X86::VPORDZ256rmb,    TB_BCAST_D },
-  { X86::VPORDZrr,       X86::VPORDZrmb,       TB_BCAST_D },
-  { X86::VPORQZ128rr,    X86::VPORQZ128rmb,    TB_BCAST_Q },
-  { X86::VPORQZ256rr,    X86::VPORQZ256rmb,    TB_BCAST_Q },
-  { X86::VPORQZrr,       X86::VPORQZrmb,       TB_BCAST_Q },
-  { X86::VPTESTMDZ128rr, X86::VPTESTMDZ128rmb, TB_BCAST_D },
-  { X86::VPTESTMDZ256rr, X86::VPTESTMDZ256rmb, TB_BCAST_D },
-  { X86::VPTESTMDZrr,    X86::VPTESTMDZrmb,    TB_BCAST_D },
-  { X86::VPTESTMQZ128rr, X86::VPTESTMQZ128rmb, TB_BCAST_Q },
-  { X86::VPTESTMQZ256rr, X86::VPTESTMQZ256rmb, TB_BCAST_Q },
-  { X86::VPTESTMQZrr,    X86::VPTESTMQZrmb,    TB_BCAST_Q },
-  { X86::VPTESTNMDZ128rr,X86::VPTESTNMDZ128rmb,TB_BCAST_D },
-  { X86::VPTESTNMDZ256rr,X86::VPTESTNMDZ256rmb,TB_BCAST_D },
-  { X86::VPTESTNMDZrr,   X86::VPTESTNMDZrmb,   TB_BCAST_D },
-  { X86::VPTESTNMQZ128rr,X86::VPTESTNMQZ128rmb,TB_BCAST_Q },
-  { X86::VPTESTNMQZ256rr,X86::VPTESTNMQZ256rmb,TB_BCAST_Q },
-  { X86::VPTESTNMQZrr,   X86::VPTESTNMQZrmb,   TB_BCAST_Q },
-  { X86::VPXORDZ128rr,   X86::VPXORDZ128rmb,   TB_BCAST_D },
-  { X86::VPXORDZ256rr,   X86::VPXORDZ256rmb,   TB_BCAST_D },
-  { X86::VPXORDZrr,      X86::VPXORDZrmb,      TB_BCAST_D },
-  { X86::VPXORQZ128rr,   X86::VPXORQZ128rmb,   TB_BCAST_Q },
-  { X86::VPXORQZ256rr,   X86::VPXORQZ256rmb,   TB_BCAST_Q },
-  { X86::VPXORQZrr,      X86::VPXORQZrmb,      TB_BCAST_Q },
-  { X86::VSUBPDZ128rr,   X86::VSUBPDZ128rmb,   TB_BCAST_SD },
-  { X86::VSUBPDZ256rr,   X86::VSUBPDZ256rmb,   TB_BCAST_SD },
-  { X86::VSUBPDZrr,      X86::VSUBPDZrmb,      TB_BCAST_SD },
-  { X86::VSUBPSZ128rr,   X86::VSUBPSZ128rmb,   TB_BCAST_SS },
-  { X86::VSUBPSZ256rr,   X86::VSUBPSZ256rmb,   TB_BCAST_SS },
-  { X86::VSUBPSZrr,      X86::VSUBPSZrmb,      TB_BCAST_SS },
-  { X86::VXORPDZ128rr,   X86::VXORPDZ128rmb,   TB_BCAST_SD },
-  { X86::VXORPDZ256rr,   X86::VXORPDZ256rmb,   TB_BCAST_SD },
-  { X86::VXORPDZrr,      X86::VXORPDZrmb,      TB_BCAST_SD },
-  { X86::VXORPSZ128rr,   X86::VXORPSZ128rmb,   TB_BCAST_SS },
-  { X86::VXORPSZ256rr,   X86::VXORPSZ256rmb,   TB_BCAST_SS },
-  { X86::VXORPSZrr,      X86::VXORPSZrmb,      TB_BCAST_SS },
-};
-
-static const X86FoldTableEntry BroadcastTable3[] = {
-  { X86::VFMADD132PDZ128r,     X86::VFMADD132PDZ128mb,    TB_BCAST_SD },
-  { X86::VFMADD132PDZ256r,     X86::VFMADD132PDZ256mb,    TB_BCAST_SD },
-  { X86::VFMADD132PDZr,        X86::VFMADD132PDZmb,       TB_BCAST_SD },
-  { X86::VFMADD132PSZ128r,     X86::VFMADD132PSZ128mb,    TB_BCAST_SS },
-  { X86::VFMADD132PSZ256r,     X86::VFMADD132PSZ256mb,    TB_BCAST_SS },
-  { X86::VFMADD132PSZr,        X86::VFMADD132PSZmb,       TB_BCAST_SS },
-  { X86::VFMADD213PDZ128r,     X86::VFMADD213PDZ128mb,    TB_BCAST_SD },
-  { X86::VFMADD213PDZ256r,     X86::VFMADD213PDZ256mb,    TB_BCAST_SD },
-  { X86::VFMADD213PDZr,        X86::VFMADD213PDZmb,       TB_BCAST_SD },
-  { X86::VFMADD213PSZ128r,     X86::VFMADD213PSZ128mb,    TB_BCAST_SS },
-  { X86::VFMADD213PSZ256r,     X86::VFMADD213PSZ256mb,    TB_BCAST_SS },
-  { X86::VFMADD213PSZr,        X86::VFMADD213PSZmb,       TB_BCAST_SS },
-  { X86::VFMADD231PDZ128r,     X86::VFMADD231PDZ128mb,    TB_BCAST_SD },
-  { X86::VFMADD231PDZ256r,     X86::VFMADD231PDZ256mb,    TB_BCAST_SD },
-  { X86::VFMADD231PDZr,        X86::VFMADD231PDZmb,       TB_BCAST_SD },
-  { X86::VFMADD231PSZ128r,     X86::VFMADD231PSZ128mb,    TB_BCAST_SS },
-  { X86::VFMADD231PSZ256r,     X86::VFMADD231PSZ256mb,    TB_BCAST_SS },
-  { X86::VFMADD231PSZr,        X86::VFMADD231PSZmb,       TB_BCAST_SS },
-  { X86::VFMADDSUB132PDZ128r,  X86::VFMADDSUB132PDZ128mb, TB_BCAST_SD },
-  { X86::VFMADDSUB132PDZ256r,  X86::VFMADDSUB132PDZ256mb, TB_BCAST_SD },
-  { X86::VFMADDSUB132PDZr,     X86::VFMADDSUB132PDZmb,    TB_BCAST_SD },
-  { X86::VFMADDSUB132PSZ128r,  X86::VFMADDSUB132PSZ128mb, TB_BCAST_SS },
-  { X86::VFMADDSUB132PSZ256r,  X86::VFMADDSUB132PSZ256mb, TB_BCAST_SS },
-  { X86::VFMADDSUB132PSZr,     X86::VFMADDSUB132PSZmb,    TB_BCAST_SS },
-  { X86::VFMADDSUB213PDZ128r,  X86::VFMADDSUB213PDZ128mb, TB_BCAST_SD },
-  { X86::VFMADDSUB213PDZ256r,  X86::VFMADDSUB213PDZ256mb, TB_BCAST_SD },
-  { X86::VFMADDSUB213PDZr,     X86::VFMADDSUB213PDZmb,    TB_BCAST_SD },
-  { X86::VFMADDSUB213PSZ128r,  X86::VFMADDSUB213PSZ128mb, TB_BCAST_SS },
-  { X86::VFMADDSUB213PSZ256r,  X86::VFMADDSUB213PSZ256mb, TB_BCAST_SS },
-  { X86::VFMADDSUB213PSZr,     X86::VFMADDSUB213PSZmb,    TB_BCAST_SS },
-  { X86::VFMADDSUB231PDZ128r,  X86::VFMADDSUB231PDZ128mb, TB_BCAST_SD },
-  { X86::VFMADDSUB231PDZ256r,  X86::VFMADDSUB231PDZ256mb, TB_BCAST_SD },
-  { X86::VFMADDSUB231PDZr,     X86::VFMADDSUB231PDZmb,    TB_BCAST_SD },
-  { X86::VFMADDSUB231PSZ128r,  X86::VFMADDSUB231PSZ128mb, TB_BCAST_SS },
-  { X86::VFMADDSUB231PSZ256r,  X86::VFMADDSUB231PSZ256mb, TB_BCAST_SS },
-  { X86::VFMADDSUB231PSZr,     X86::VFMADDSUB231PSZmb,    TB_BCAST_SS },
-  { X86::VFMSUB132PDZ128r,     X86::VFMSUB132PDZ128mb,    TB_BCAST_SD },
-  { X86::VFMSUB132PDZ256r,     X86::VFMSUB132PDZ256mb,    TB_BCAST_SD },
-  { X86::VFMSUB132PDZr,        X86::VFMSUB132PDZmb,       TB_BCAST_SD },
-  { X86::VFMSUB132PSZ128r,     X86::VFMSUB132PSZ128mb,    TB_BCAST_SS },
-  { X86::VFMSUB132PSZ256r,     X86::VFMSUB132PSZ256mb,    TB_BCAST_SS },
-  { X86::VFMSUB132PSZr,        X86::VFMSUB132PSZmb,       TB_BCAST_SS },
-  { X86::VFMSUB213PDZ128r,     X86::VFMSUB213PDZ128mb,    TB_BCAST_SD },
-  { X86::VFMSUB213PDZ256r,     X86::VFMSUB213PDZ256mb,    TB_BCAST_SD },
-  { X86::VFMSUB213PDZr,        X86::VFMSUB213PDZmb,       TB_BCAST_SD },
-  { X86::VFMSUB213PSZ128r,     X86::VFMSUB213PSZ128mb,    TB_BCAST_SS },
-  { X86::VFMSUB213PSZ256r,     X86::VFMSUB213PSZ256mb,    TB_BCAST_SS },
-  { X86::VFMSUB213PSZr,        X86::VFMSUB213PSZmb,       TB_BCAST_SS },
-  { X86::VFMSUB231PDZ128r,     X86::VFMSUB231PDZ128mb,    TB_BCAST_SD },
-  { X86::VFMSUB231PDZ256r,     X86::VFMSUB231PDZ256mb,    TB_BCAST_SD },
-  { X86::VFMSUB231PDZr,        X86::VFMSUB231PDZmb,       TB_BCAST_SD },
-  { X86::VFMSUB231PSZ128r,     X86::VFMSUB231PSZ128mb,    TB_BCAST_SS },
-  { X86::VFMSUB231PSZ256r,     X86::VFMSUB231PSZ256mb,    TB_BCAST_SS },
-  { X86::VFMSUB231PSZr,        X86::VFMSUB231PSZmb,       TB_BCAST_SS },
-  { X86::VFMSUBADD132PDZ128r,  X86::VFMSUBADD132PDZ128mb, TB_BCAST_SD },
-  { X86::VFMSUBADD132PDZ256r,  X86::VFMSUBADD132PDZ256mb, TB_BCAST_SD },
-  { X86::VFMSUBADD132PDZr,     X86::VFMSUBADD132PDZmb,    TB_BCAST_SD },
-  { X86::VFMSUBADD132PSZ128r,  X86::VFMSUBADD132PSZ128mb, TB_BCAST_SS },
-  { X86::VFMSUBADD132PSZ256r,  X86::VFMSUBADD132PSZ256mb, TB_BCAST_SS },
-  { X86::VFMSUBADD132PSZr,     X86::VFMSUBADD132PSZmb,    TB_BCAST_SS },
-  { X86::VFMSUBADD213PDZ128r,  X86::VFMSUBADD213PDZ128mb, TB_BCAST_SD },
-  { X86::VFMSUBADD213PDZ256r,  X86::VFMSUBADD213PDZ256mb, TB_BCAST_SD },
-  { X86::VFMSUBADD213PDZr,     X86::VFMSUBADD213PDZmb,    TB_BCAST_SD },
-  { X86::VFMSUBADD213PSZ128r,  X86::VFMSUBADD213PSZ128mb, TB_BCAST_SS },
-  { X86::VFMSUBADD213PSZ256r,  X86::VFMSUBADD213PSZ256mb, TB_BCAST_SS },
-  { X86::VFMSUBADD213PSZr,     X86::VFMSUBADD213PSZmb,    TB_BCAST_SS },
-  { X86::VFMSUBADD231PDZ128r,  X86::VFMSUBADD231PDZ128mb, TB_BCAST_SD },
-  { X86::VFMSUBADD231PDZ256r,  X86::VFMSUBADD231PDZ256mb, TB_BCAST_SD },
-  { X86::VFMSUBADD231PDZr,     X86::VFMSUBADD231PDZmb,    TB_BCAST_SD },
-  { X86::VFMSUBADD231PSZ128r,  X86::VFMSUBADD231PSZ128mb, TB_BCAST_SS },
-  { X86::VFMSUBADD231PSZ256r,  X86::VFMSUBADD231PSZ256mb, TB_BCAST_SS },
-  { X86::VFMSUBADD231PSZr,     X86::VFMSUBADD231PSZmb,    TB_BCAST_SS },
-  { X86::VFNMADD132PDZ128r,    X86::VFNMADD132PDZ128mb,   TB_BCAST_SD },
-  { X86::VFNMADD132PDZ256r,    X86::VFNMADD132PDZ256mb,   TB_BCAST_SD },
-  { X86::VFNMADD132PDZr,       X86::VFNMADD132PDZmb,      TB_BCAST_SD },
-  { X86::VFNMADD132PSZ128r,    X86::VFNMADD132PSZ128mb,   TB_BCAST_SS },
-  { X86::VFNMADD132PSZ256r,    X86::VFNMADD132PSZ256mb,   TB_BCAST_SS },
-  { X86::VFNMADD132PSZr,       X86::VFNMADD132PSZmb,      TB_BCAST_SS },
-  { X86::VFNMADD213PDZ128r,    X86::VFNMADD213PDZ128mb,   TB_BCAST_SD },
-  { X86::VFNMADD213PDZ256r,    X86::VFNMADD213PDZ256mb,   TB_BCAST_SD },
-  { X86::VFNMADD213PDZr,       X86::VFNMADD213PDZmb,      TB_BCAST_SD },
-  { X86::VFNMADD213PSZ128r,    X86::VFNMADD213PSZ128mb,   TB_BCAST_SS },
-  { X86::VFNMADD213PSZ256r,    X86::VFNMADD213PSZ256mb,   TB_BCAST_SS },
-  { X86::VFNMADD213PSZr,       X86::VFNMADD213PSZmb,      TB_BCAST_SS },
-  { X86::VFNMADD231PDZ128r,    X86::VFNMADD231PDZ128mb,   TB_BCAST_SD },
-  { X86::VFNMADD231PDZ256r,    X86::VFNMADD231PDZ256mb,   TB_BCAST_SD },
-  { X86::VFNMADD231PDZr,       X86::VFNMADD231PDZmb,      TB_BCAST_SD },
-  { X86::VFNMADD231PSZ128r,    X86::VFNMADD231PSZ128mb,   TB_BCAST_SS },
-  { X86::VFNMADD231PSZ256r,    X86::VFNMADD231PSZ256mb,   TB_BCAST_SS },
-  { X86::VFNMADD231PSZr,       X86::VFNMADD231PSZmb,      TB_BCAST_SS },
-  { X86::VFNMSUB132PDZ128r,    X86::VFNMSUB132PDZ128mb,   TB_BCAST_SD },
-  { X86::VFNMSUB132PDZ256r,    X86::VFNMSUB132PDZ256mb,   TB_BCAST_SD },
-  { X86::VFNMSUB132PDZr,       X86::VFNMSUB132PDZmb,      TB_BCAST_SD },
-  { X86::VFNMSUB132PSZ128r,    X86::VFNMSUB132PSZ128mb,   TB_BCAST_SS },
-  { X86::VFNMSUB132PSZ256r,    X86::VFNMSUB132PSZ256mb,   TB_BCAST_SS },
-  { X86::VFNMSUB132PSZr,       X86::VFNMSUB132PSZmb,      TB_BCAST_SS },
-  { X86::VFNMSUB213PDZ128r,    X86::VFNMSUB213PDZ128mb,   TB_BCAST_SD },
-  { X86::VFNMSUB213PDZ256r,    X86::VFNMSUB213PDZ256mb,   TB_BCAST_SD },
-  { X86::VFNMSUB213PDZr,       X86::VFNMSUB213PDZmb,      TB_BCAST_SD },
-  { X86::VFNMSUB213PSZ128r,    X86::VFNMSUB213PSZ128mb,   TB_BCAST_SS },
-  { X86::VFNMSUB213PSZ256r,    X86::VFNMSUB213PSZ256mb,   TB_BCAST_SS },
-  { X86::VFNMSUB213PSZr,       X86::VFNMSUB213PSZmb,      TB_BCAST_SS },
-  { X86::VFNMSUB231PDZ128r,    X86::VFNMSUB231PDZ128mb,   TB_BCAST_SD },
-  { X86::VFNMSUB231PDZ256r,    X86::VFNMSUB231PDZ256mb,   TB_BCAST_SD },
-  { X86::VFNMSUB231PDZr,       X86::VFNMSUB231PDZmb,      TB_BCAST_SD },
-  { X86::VFNMSUB231PSZ128r,    X86::VFNMSUB231PSZ128mb,   TB_BCAST_SS },
-  { X86::VFNMSUB231PSZ256r,    X86::VFNMSUB231PSZ256mb,   TB_BCAST_SS },
-  { X86::VFNMSUB231PSZr,       X86::VFNMSUB231PSZmb,      TB_BCAST_SS },
-  { X86::VPTERNLOGDZ128rri,    X86::VPTERNLOGDZ128rmbi,   TB_BCAST_D },
-  { X86::VPTERNLOGDZ256rri,    X86::VPTERNLOGDZ256rmbi,   TB_BCAST_D },
-  { X86::VPTERNLOGDZrri,       X86::VPTERNLOGDZrmbi,      TB_BCAST_D },
-  { X86::VPTERNLOGQZ128rri,    X86::VPTERNLOGQZ128rmbi,   TB_BCAST_Q },
-  { X86::VPTERNLOGQZ256rri,    X86::VPTERNLOGQZ256rmbi,   TB_BCAST_Q },
-  { X86::VPTERNLOGQZrri,       X86::VPTERNLOGQZrmbi,      TB_BCAST_Q },
-};
-
 // Table to map instructions safe to broadcast using a different width from the
 // element width.
 static const X86FoldTableEntry BroadcastSizeTable2[] = {
@@ -397,8 +103,10 @@ lookupFoldTableImpl(ArrayRef<X86FoldTableEntry> Table, unsigned RegOp) {
     CHECK_SORTED_UNIQUE(Table2)
     CHECK_SORTED_UNIQUE(Table3)
     CHECK_SORTED_UNIQUE(Table4)
+    CHECK_SORTED_UNIQUE(BroadcastTable1)
     CHECK_SORTED_UNIQUE(BroadcastTable2)
     CHECK_SORTED_UNIQUE(BroadcastTable3)
+    CHECK_SORTED_UNIQUE(BroadcastTable4)
     CHECK_SORTED_UNIQUE(BroadcastSizeTable2)
     CHECK_SORTED_UNIQUE(BroadcastSizeTable3)
     FoldTablesChecked.store(true, std::memory_order_relaxed);
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index adcf67e8c3cc538..63483abddde039f 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -76,6 +76,16 @@ class X86FoldTablesEmitter {
     bool NoForward = false;
     bool FoldLoad = false;
     bool FoldStore = false;
+    enum BcastType {
+      BCAST_NONE,
+      BCAST_D,
+      BCAST_Q,
+      BCAST_SS,
+      BCAST_SD,
+      BCAST_SH,
+    };
+    BcastType BroadcastKind = BCAST_NONE;
+
     Align Alignment;
 
     X86FoldTableEntry() = default;
@@ -99,6 +109,25 @@ class X86FoldTablesEmitter {
         Attrs += "TB_NO_FORWARD|";
       if (Alignment != Align(1))
         Attrs += "TB_ALIGN_" + std::to_string(Alignment.value()) + "|";
+      switch (BroadcastKind) {
+      case BCAST_NONE:
+        break;
+      case BCAST_D:
+        Attrs += "TB_BCAST_D|";
+        break;
+      case BCAST_Q:
+        Attrs += "TB_BCAST_Q|";
+        break;
+      case BCAST_SS:
+        Attrs += "TB_BCAST_SS|";
+        break;
+      case BCAST_SD:
+        Attrs += "TB_BCAST_SD|";
+        break;
+      case BCAST_SH:
+        Attrs += "TB_BCAST_SH|";
+        break;
+      }
 
       StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("|");
       if (SimplifiedAttrs.empty())
@@ -143,16 +172,24 @@ class X86FoldTablesEmitter {
   typedef std::map<const CodeGenInstruction *, X86FoldTableEntry,
                    CompareInstrsByEnum>
       FoldTable;
-  // std::vector for each folding table.
-  // Table2Addr - Holds instructions which their memory form performs load+store
-  // Table#i - Holds instructions which the their memory form perform a load OR
-  //           a store,  and their #i'th operand is folded.
+  // Table2Addr - Holds instructions which their memory form performs
+  //              load+store.
+  //
+  // Table#i - Holds instructions which the their memory form
+  //           performs a load OR a store, and their #i'th operand is folded.
+  //
+  // BroadcastTable#i - Holds instructions which the their memory form performs
+  //                    a broadcat load and their #i'th operand is folded.
   FoldTable Table2Addr;
   FoldTable Table0;
   FoldTable Table1;
   FoldTable Table2;
   FoldTable Table3;
   FoldTable Table4;
+  FoldTable BroadcastTable1;
+  FoldTable BroadcastTable2;
+  FoldTable BroadcastTable3;
+  FoldTable BroadcastTable4;
 
 public:
   X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
@@ -165,13 +202,17 @@ class X86FoldTablesEmitter {
   // S sets the strategy of adding the TB_NO_REVERSE flag.
   void updateTables(const CodeGenInstruction *RegInst,
                     const CodeGenInstruction *MemInst, uint16_t S = 0,
-                    bool IsManual = false);
+                    bool IsManual = false, bool IsBroadcast = false);
 
   // Generates X86FoldTableEntry with the given instructions and fill it with
-  // the appropriate flags - then adds it to Table.
+  // the appropriate flags, then adds it to a memory fold table.
   void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInst,
                          const CodeGenInstruction *MemInst, uint16_t S,
                          unsigned FoldedIdx, bool IsManual);
+  // Generates X86FoldTableEntry with the given instructions and adds it to a
+  // broadcast table.
+  void addBroadcastEntry(FoldTable &Table, const CodeGenInstruction *RegInst,
+                         const CodeGenInstruction *MemInst);
 
   // Print the given table as a static const C++ array of type
   // X86FoldTableEntry.
@@ -288,11 +329,12 @@ static bool isNOREXRegClass(const Record *Op) {
 class IsMatch {
   const CodeGenInstruction *MemInst;
   const X86Disassembler::RecognizableInstrBase MemRI;
+  bool IsBroadcast;
   const unsigned Variant;
 
 public:
-  IsMatch(const CodeGenInstruction *Inst, unsigned V)
-      : MemInst(Inst), MemRI(*MemInst), Variant(V) {}
+  IsMatch(const CodeGenInstruction *Inst, bool IsBroadcast, unsigned V)
+      : MemInst(Inst), MemRI(*MemInst), IsBroadcast(IsBroadcast), Variant(V) {}
 
   bool operator()(const CodeGenInstruction *RegInst) {
     X86Disassembler::RecognizableInstrBase RegRI(*RegInst);
@@ -300,7 +342,11 @@ class IsMatch {
     const Record *MemRec = MemInst->TheDef;
 
     // EVEX_B means different things for memory and register forms.
-    if (RegRI.HasEVEX_B || MemRI.HasEVEX_B)
+    // register form: rounding control or SAE
+    // memory form: broadcast
+    if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B))
+      return false;
+    if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B))
       return false;
 
     if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form))
@@ -472,9 +518,65 @@ void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
   Table[RegInst] = Result;
 }
 
+void X86FoldTablesEmitter::addBroadcastEntry(
+    FoldTable &Table, const CodeGenInstruction *RegInst,
+    const CodeGenInstruction *MemInst) {
+
+  assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly");
+  X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);
+
+  Record *RegRec = RegInst->TheDef;
+  StringRef RegInstName = RegRec->getName();
+  StringRef MemInstName = MemInst->TheDef->getName();
+  Record *Domain = RegRec->getValueAsDef("ExeDomain");
+  bool IsSSEPackedInt = Domain->getName() == "SSEPackedInt";
+  // TODO: Rename AVX512 instructions to simplify conditions, e.g.
+  //         D128 -> DZ128
+  //         D256 -> DZ256
+  //         VPERMI2Drr -> VPERMI2DZrr
+  //         VPERMI2Drmb -> VPERMI2DZrmb
+  if ((RegInstName.contains("DZ") || RegInstName.contains("DWZ") ||
+       RegInstName.contains("D128") || RegInstName.contains("D256") ||
+       RegInstName.contains("Dr") || RegInstName.contains("I32")) &&
+      IsSSEPackedInt) {
+    assert((MemInstName.contains("DZ") || RegInstName.contains("DWZ") ||
+            MemInstName.contains("D128") || MemInstName.contains("D256") ||
+            MemInstName.contains("Dr") || MemInstName.contains("I32")) &&
+           "Unmatched names for broadcast");
+    Result.BroadcastKind = X86FoldTableEntry::BCAST_D;
+  } else if ((RegInstName.contains("QZ") || RegInstName.contains("QBZ") ||
+              RegInstName.contains("Q128") || RegInstName.contains("Q256") ||
+              RegInstName.contains("Qr") || RegInstName.contains("I64")) &&
+             IsSSEPackedInt) {
+    assert((MemInstName.contains("QZ") || MemInstName.contains("QBZ") ||
+            MemInstName.contains("Q128") || MemInstName.contains("Q256") ||
+            MemInstName.contains("Qr") || MemInstName.contains("I64")) &&
+           "Unmatched names for broadcast");
+    Result.BroadcastKind = X86FoldTableEntry::BCAST_Q;
+  } else if (RegInstName.contains("PS") || RegInstName.contains("F32")) {
+    assert((MemInstName.contains("PS") || MemInstName.contains("F32")) &&
+           "Unmatched names for broadcast");
+    Result.BroadcastKind = X86FoldTableEntry::BCAST_SS;
+  } else if (RegInstName.contains("PD") || RegInstName.contains("F64")) {
+    assert((MemInstName.contains("PD") || MemInstName.contains("F64")) &&
+           "Unmatched names for broadcast");
+    Result.BroadcastKind = X86FoldTableEntry::BCAST_SD;
+  } else if (RegInstName.contains("PH")) {
+    assert(MemInstName.contains("PH") && "Unmatched names for broadcast");
+    Result.BroadcastKind = X86FoldTableEntry::BCAST_SH;
+  } else {
+    errs() << RegInstName << ", " << MemInstName << "\n";
+    llvm_unreachable("Name is not canoicalized for broadcast or "
+                     "ExeDomain is incorrect");
+  }
+
+  Table[RegInst] = Result;
+}
+
 void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst,
                                         const CodeGenInstruction *MemInst,
-                                        uint16_t S, bool IsManual) {
+                                        uint16_t S, bool IsManual,
+                                        bool IsBroadcast) {
 
   Record *RegRec = RegInst->TheDef;
   Record *MemRec = MemInst->TheDef;
@@ -485,6 +587,7 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst,
 
   // Instructions which Read-Modify-Write should be added to Table2Addr.
   if (!MemOutSize && RegOutSize == 1 && MemInSize == RegInSize) {
+    assert(!IsBroadcast && "Read-Modify-Write can not be broadcast");
     // X86 would not unfold Read-Modify-Write instructions so add TB_NO_REVERSE.
     addEntryWithFlags(Table2Addr, RegInst, MemInst, S | TB_NO_REVERSE, 0,
                       IsManual);
@@ -505,19 +608,28 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst,
           isMemoryOperand(MemOpRec)) {
         switch (I) {
         case 0:
+          assert(!IsBroadcast && "BroadcastTable0 needs to be added");
           addEntryWithFlags(Table0, RegInst, MemInst, S, 0, IsManual);
           return;
         case 1:
-          addEntryWithFlags(Table1, RegInst, MemInst, S, 1, IsManual);
+          IsBroadcast
+              ? addBroadcastEntry(BroadcastTable1, RegInst, MemInst)
+              : addEntryWithFlags(Table1, RegInst, MemInst, S, 1, IsManual);
           return;
         case 2:
-          addEntryWithFlags(Table2, RegInst, MemInst, S, 2, IsManual);
+          IsBroadcast
+              ? addBroadcastEntry(BroadcastTable2, RegInst, MemInst)
+              : addEntryWithFlags(Table2, RegInst, MemInst, S, 2, IsManual);
           return;
         case 3:
-          addEntryWithFlags(Table3, RegInst, MemInst, S, 3, IsManual);
+          IsBroadcast
+              ? addBroadcastEntry(BroadcastTable3, RegInst, MemInst)
+              : addEntryWithFlags(Table3, RegInst, MemInst, S, 3, IsManual);
           return;
         case 4:
-          addEntryWithFlags(Table4, RegInst, MemInst, S, 4, IsManual);
+          IsBroadcast
+              ? addBroadcastEntry(BroadcastTable4, RegInst, MemInst)
+              : addEntryWithFlags(Table4, RegInst, MemInst, S, 4, IsManual);
           return;
         }
       }
@@ -533,8 +645,10 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst,
     Record *RegOpRec = RegInst->Operands[RegOutSize - 1].Rec;
     Record *MemOpRec = MemInst->Operands[RegOutSize - 1].Rec;
     if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) &&
-        getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec))
+        getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec)) {
+      assert(!IsBroadcast && "Store can not be broadcast");
       addEntryWithFlags(Table0, RegInst, MemInst, S, 0, IsManual);
+    }
   }
 }
 
@@ -580,8 +694,19 @@ void X86FoldTablesEmitter::run(raw_ostream &O) {
     }
   }
 
+  // Create a copy b/c the register instruction will removed when a new entry is
+  // added into memory fold tables.
+  auto RegInstsForBroadcast = RegInsts;
+
   Record *AsmWriter = Target.getAsmWriter();
   unsigned Variant = AsmWriter->getValueAsInt("Variant");
+  auto FixUp = [&](const CodeGenInstruction *RegInst) {
+    StringRef RegInstName = RegInst->TheDef->getName();
+    if (RegInstName.ends_with("_REV") || RegInstName.ends_with("_alt"))
+      if (auto *RegAltRec = Records.getDef(RegInstName.drop_back(4)))
+        RegInst = &Target.getInstruction(RegAltRec);
+    return RegInst;
+  };
   // For each memory form instruction, try to find its register form
   // instruction.
   for (const CodeGenInstruction *MemInst : MemInsts) {
@@ -596,17 +721,30 @@ void X86FoldTablesEmitter::run(raw_ostream &O) {
     // opcode.
     std::vector<const CodeGenInstruction *> &OpcRegInsts = RegInstsIt->second;
 
-    auto Match = find_if(OpcRegInsts, IsMatch(MemInst, Variant));
+    // Memory fold tables
+    auto Match =
+        find_if(OpcRegInsts, IsMatch(MemInst, /*IsBroadcast=*/false, Variant));
     if (Match != OpcRegInsts.end()) {
-      const CodeGenInstruction *RegInst = *Match;
-      StringRef RegInstName = RegInst->TheDef->getName();
-      if (RegInstName.ends_with("_REV") || RegInstName.ends_with("_alt"))
-        if (auto *RegAltRec = Records.getDef(RegInstName.drop_back(4)))
-          RegInst = &Target.getInstruction(RegAltRec);
-
-      updateTables(RegInst, MemInst);
+      updateTables(FixUp(*Match), MemInst);
       OpcRegInsts.erase(Match);
     }
+
+    // Broadcast tables
+    StringRef MemInstName = MemInst->TheDef->getName();
+    if (!MemInstName.contains("mb") && !MemInstName.contains("mib"))
+      continue;
+    RegInstsIt = RegInstsForBroadcast.find(Opc);
+    assert(RegInstsIt != RegInstsForBroadcast.end() &&
+           "Unexpected control flow");
+    std::vector<const CodeGenInstruction *> &OpcRegInstsForBroadcast =
+        RegInstsIt->second;
+    Match = find_if(OpcRegInstsForBroadcast,
+                    IsMatch(MemInst, /*IsBroadcast=*/true, Variant));
+    if (Match != OpcRegInstsForBroadcast.end()) {
+      updateTables(FixUp(*Match), MemInst, 0, /*IsMannual=*/false,
+                   /*IsBroadcast=*/true);
+      OpcRegInstsForBroadcast.erase(Match);
+    }
   }
 
   // Add the manually mapped instructions listed above.
@@ -631,6 +769,10 @@ void X86FoldTablesEmitter::run(raw_ostream &O) {
   CheckMemFoldTable(Table2);
   CheckMemFoldTable(Table3);
   CheckMemFoldTable(Table4);
+  CheckMemFoldTable(BroadcastTable1);
+  CheckMemFoldTable(BroadcastTable2);
+  CheckMemFoldTable(BroadcastTable3);
+  CheckMemFoldTable(BroadcastTable4);
 #endif
 #define PRINT_TABLE(TABLE) printTable(TABLE, #TABLE, OS);
   // Print all tables.
@@ -640,6 +782,10 @@ void X86FoldTablesEmitter::run(raw_ostream &O) {
   PRINT_TABLE(Table2)
   PRINT_TABLE(Table3)
   PRINT_TABLE(Table4)
+  PRINT_TABLE(BroadcastTable1)
+  PRINT_TABLE(BroadcastTable2)
+  PRINT_TABLE(BroadcastTable3)
+  PRINT_TABLE(BroadcastTable4)
 }
 
 static TableGen::Emitter::OptClass<X86FoldTablesEmitter>



More information about the llvm-commits mailing list