[llvm] [X86][tablgen] Fix the broadcast tables (PR #79675)

Shengchen Kan via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 27 02:47:52 PST 2024


https://github.com/KanRobert updated https://github.com/llvm/llvm-project/pull/79675

>From 312eaaf51450e668399dd421dc402c68d20627b7 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Sat, 27 Jan 2024 12:59:17 +0800
Subject: [PATCH 1/2] [X86][tablgen] Fix the broadcast tables

---
 llvm/lib/Target/X86/X86InstrAVX512.td        |   2 +-
 llvm/test/TableGen/x86-fold-tables.inc       | 342 +++++++++----------
 llvm/utils/TableGen/X86FoldTablesEmitter.cpp |  54 +--
 3 files changed, 188 insertions(+), 210 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index fe7d90fbcdf7070..bb5e22c71427939 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12422,7 +12422,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
   let ExeDomain = VTI.ExeDomain in
   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
-                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
+                (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3),
                 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
                 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
                 (OpNode (VTI.VT VTI.RC:$src1),
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 029beeff0564438..dc400548ef2f0a7 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -6626,15 +6626,15 @@ static const X86FoldTableEntry Table4[] = {
 };
 
 static const X86FoldTableEntry BroadcastTable1[] = {
-  {X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rmb, TB_BCAST_SD},
-  {X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rmb, TB_BCAST_SD},
-  {X86::VCVTDQ2PDZrr, X86::VCVTDQ2PDZrmb, TB_BCAST_SD},
-  {X86::VCVTDQ2PHZ128rr, X86::VCVTDQ2PHZ128rmb, TB_BCAST_SH},
-  {X86::VCVTDQ2PHZ256rr, X86::VCVTDQ2PHZ256rmb, TB_BCAST_SH},
-  {X86::VCVTDQ2PHZrr, X86::VCVTDQ2PHZrmb, TB_BCAST_SH},
-  {X86::VCVTDQ2PSZ128rr, X86::VCVTDQ2PSZ128rmb, TB_BCAST_SS},
-  {X86::VCVTDQ2PSZ256rr, X86::VCVTDQ2PSZ256rmb, TB_BCAST_SS},
-  {X86::VCVTDQ2PSZrr, X86::VCVTDQ2PSZrmb, TB_BCAST_SS},
+  {X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rmb, TB_BCAST_D},
+  {X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rmb, TB_BCAST_D},
+  {X86::VCVTDQ2PDZrr, X86::VCVTDQ2PDZrmb, TB_BCAST_D},
+  {X86::VCVTDQ2PHZ128rr, X86::VCVTDQ2PHZ128rmb, TB_BCAST_D},
+  {X86::VCVTDQ2PHZ256rr, X86::VCVTDQ2PHZ256rmb, TB_BCAST_D},
+  {X86::VCVTDQ2PHZrr, X86::VCVTDQ2PHZrmb, TB_BCAST_D},
+  {X86::VCVTDQ2PSZ128rr, X86::VCVTDQ2PSZ128rmb, TB_BCAST_D},
+  {X86::VCVTDQ2PSZ256rr, X86::VCVTDQ2PSZ256rmb, TB_BCAST_D},
+  {X86::VCVTDQ2PSZrr, X86::VCVTDQ2PSZrmb, TB_BCAST_D},
   {X86::VCVTNEPS2BF16Z128rr, X86::VCVTNEPS2BF16Z128rmb, TB_BCAST_SS},
   {X86::VCVTNEPS2BF16Z256rr, X86::VCVTNEPS2BF16Z256rmb, TB_BCAST_SS},
   {X86::VCVTNEPS2BF16Zrr, X86::VCVTNEPS2BF16Zrmb, TB_BCAST_SS},
@@ -6644,9 +6644,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
   {X86::VCVTPD2PHZ128rr, X86::VCVTPD2PHZ128rmb, TB_BCAST_SD},
   {X86::VCVTPD2PHZ256rr, X86::VCVTPD2PHZ256rmb, TB_BCAST_SD},
   {X86::VCVTPD2PHZrr, X86::VCVTPD2PHZrmb, TB_BCAST_SD},
-  {X86::VCVTPD2PSZ128rr, X86::VCVTPD2PSZ128rmb, TB_BCAST_SS},
-  {X86::VCVTPD2PSZ256rr, X86::VCVTPD2PSZ256rmb, TB_BCAST_SS},
-  {X86::VCVTPD2PSZrr, X86::VCVTPD2PSZrmb, TB_BCAST_SS},
+  {X86::VCVTPD2PSZ128rr, X86::VCVTPD2PSZ128rmb, TB_BCAST_SD},
+  {X86::VCVTPD2PSZ256rr, X86::VCVTPD2PSZ256rmb, TB_BCAST_SD},
+  {X86::VCVTPD2PSZrr, X86::VCVTPD2PSZrmb, TB_BCAST_SD},
   {X86::VCVTPD2QQZ128rr, X86::VCVTPD2QQZ128rmb, TB_BCAST_SD},
   {X86::VCVTPD2QQZ256rr, X86::VCVTPD2QQZ256rmb, TB_BCAST_SD},
   {X86::VCVTPD2QQZrr, X86::VCVTPD2QQZrmb, TB_BCAST_SD},
@@ -6698,15 +6698,15 @@ static const X86FoldTableEntry BroadcastTable1[] = {
   {X86::VCVTPS2UQQZ128rr, X86::VCVTPS2UQQZ128rmb, TB_BCAST_SS},
   {X86::VCVTPS2UQQZ256rr, X86::VCVTPS2UQQZ256rmb, TB_BCAST_SS},
   {X86::VCVTPS2UQQZrr, X86::VCVTPS2UQQZrmb, TB_BCAST_SS},
-  {X86::VCVTQQ2PDZ128rr, X86::VCVTQQ2PDZ128rmb, TB_BCAST_SD},
-  {X86::VCVTQQ2PDZ256rr, X86::VCVTQQ2PDZ256rmb, TB_BCAST_SD},
-  {X86::VCVTQQ2PDZrr, X86::VCVTQQ2PDZrmb, TB_BCAST_SD},
-  {X86::VCVTQQ2PHZ128rr, X86::VCVTQQ2PHZ128rmb, TB_BCAST_SH},
-  {X86::VCVTQQ2PHZ256rr, X86::VCVTQQ2PHZ256rmb, TB_BCAST_SH},
-  {X86::VCVTQQ2PHZrr, X86::VCVTQQ2PHZrmb, TB_BCAST_SH},
-  {X86::VCVTQQ2PSZ128rr, X86::VCVTQQ2PSZ128rmb, TB_BCAST_SS},
-  {X86::VCVTQQ2PSZ256rr, X86::VCVTQQ2PSZ256rmb, TB_BCAST_SS},
-  {X86::VCVTQQ2PSZrr, X86::VCVTQQ2PSZrmb, TB_BCAST_SS},
+  {X86::VCVTQQ2PDZ128rr, X86::VCVTQQ2PDZ128rmb, TB_BCAST_Q},
+  {X86::VCVTQQ2PDZ256rr, X86::VCVTQQ2PDZ256rmb, TB_BCAST_Q},
+  {X86::VCVTQQ2PDZrr, X86::VCVTQQ2PDZrmb, TB_BCAST_Q},
+  {X86::VCVTQQ2PHZ128rr, X86::VCVTQQ2PHZ128rmb, TB_BCAST_Q},
+  {X86::VCVTQQ2PHZ256rr, X86::VCVTQQ2PHZ256rmb, TB_BCAST_Q},
+  {X86::VCVTQQ2PHZrr, X86::VCVTQQ2PHZrmb, TB_BCAST_Q},
+  {X86::VCVTQQ2PSZ128rr, X86::VCVTQQ2PSZ128rmb, TB_BCAST_Q},
+  {X86::VCVTQQ2PSZ256rr, X86::VCVTQQ2PSZ256rmb, TB_BCAST_Q},
+  {X86::VCVTQQ2PSZrr, X86::VCVTQQ2PSZrmb, TB_BCAST_Q},
   {X86::VCVTTPD2DQZ128rr, X86::VCVTTPD2DQZ128rmb, TB_BCAST_SD},
   {X86::VCVTTPD2DQZ256rr, X86::VCVTTPD2DQZ256rmb, TB_BCAST_SD},
   {X86::VCVTTPD2DQZrr, X86::VCVTTPD2DQZrmb, TB_BCAST_SD},
@@ -6749,24 +6749,24 @@ static const X86FoldTableEntry BroadcastTable1[] = {
   {X86::VCVTTPS2UQQZ128rr, X86::VCVTTPS2UQQZ128rmb, TB_BCAST_SS},
   {X86::VCVTTPS2UQQZ256rr, X86::VCVTTPS2UQQZ256rmb, TB_BCAST_SS},
   {X86::VCVTTPS2UQQZrr, X86::VCVTTPS2UQQZrmb, TB_BCAST_SS},
-  {X86::VCVTUDQ2PDZ128rr, X86::VCVTUDQ2PDZ128rmb, TB_BCAST_SD},
-  {X86::VCVTUDQ2PDZ256rr, X86::VCVTUDQ2PDZ256rmb, TB_BCAST_SD},
-  {X86::VCVTUDQ2PDZrr, X86::VCVTUDQ2PDZrmb, TB_BCAST_SD},
-  {X86::VCVTUDQ2PHZ128rr, X86::VCVTUDQ2PHZ128rmb, TB_BCAST_SH},
-  {X86::VCVTUDQ2PHZ256rr, X86::VCVTUDQ2PHZ256rmb, TB_BCAST_SH},
-  {X86::VCVTUDQ2PHZrr, X86::VCVTUDQ2PHZrmb, TB_BCAST_SH},
-  {X86::VCVTUDQ2PSZ128rr, X86::VCVTUDQ2PSZ128rmb, TB_BCAST_SS},
-  {X86::VCVTUDQ2PSZ256rr, X86::VCVTUDQ2PSZ256rmb, TB_BCAST_SS},
-  {X86::VCVTUDQ2PSZrr, X86::VCVTUDQ2PSZrmb, TB_BCAST_SS},
-  {X86::VCVTUQQ2PDZ128rr, X86::VCVTUQQ2PDZ128rmb, TB_BCAST_SD},
-  {X86::VCVTUQQ2PDZ256rr, X86::VCVTUQQ2PDZ256rmb, TB_BCAST_SD},
-  {X86::VCVTUQQ2PDZrr, X86::VCVTUQQ2PDZrmb, TB_BCAST_SD},
-  {X86::VCVTUQQ2PHZ128rr, X86::VCVTUQQ2PHZ128rmb, TB_BCAST_SH},
-  {X86::VCVTUQQ2PHZ256rr, X86::VCVTUQQ2PHZ256rmb, TB_BCAST_SH},
-  {X86::VCVTUQQ2PHZrr, X86::VCVTUQQ2PHZrmb, TB_BCAST_SH},
-  {X86::VCVTUQQ2PSZ128rr, X86::VCVTUQQ2PSZ128rmb, TB_BCAST_SS},
-  {X86::VCVTUQQ2PSZ256rr, X86::VCVTUQQ2PSZ256rmb, TB_BCAST_SS},
-  {X86::VCVTUQQ2PSZrr, X86::VCVTUQQ2PSZrmb, TB_BCAST_SS},
+  {X86::VCVTUDQ2PDZ128rr, X86::VCVTUDQ2PDZ128rmb, TB_BCAST_D},
+  {X86::VCVTUDQ2PDZ256rr, X86::VCVTUDQ2PDZ256rmb, TB_BCAST_D},
+  {X86::VCVTUDQ2PDZrr, X86::VCVTUDQ2PDZrmb, TB_BCAST_D},
+  {X86::VCVTUDQ2PHZ128rr, X86::VCVTUDQ2PHZ128rmb, TB_BCAST_D},
+  {X86::VCVTUDQ2PHZ256rr, X86::VCVTUDQ2PHZ256rmb, TB_BCAST_D},
+  {X86::VCVTUDQ2PHZrr, X86::VCVTUDQ2PHZrmb, TB_BCAST_D},
+  {X86::VCVTUDQ2PSZ128rr, X86::VCVTUDQ2PSZ128rmb, TB_BCAST_D},
+  {X86::VCVTUDQ2PSZ256rr, X86::VCVTUDQ2PSZ256rmb, TB_BCAST_D},
+  {X86::VCVTUDQ2PSZrr, X86::VCVTUDQ2PSZrmb, TB_BCAST_D},
+  {X86::VCVTUQQ2PDZ128rr, X86::VCVTUQQ2PDZ128rmb, TB_BCAST_Q},
+  {X86::VCVTUQQ2PDZ256rr, X86::VCVTUQQ2PDZ256rmb, TB_BCAST_Q},
+  {X86::VCVTUQQ2PDZrr, X86::VCVTUQQ2PDZrmb, TB_BCAST_Q},
+  {X86::VCVTUQQ2PHZ128rr, X86::VCVTUQQ2PHZ128rmb, TB_BCAST_Q},
+  {X86::VCVTUQQ2PHZ256rr, X86::VCVTUQQ2PHZ256rmb, TB_BCAST_Q},
+  {X86::VCVTUQQ2PHZrr, X86::VCVTUQQ2PHZrmb, TB_BCAST_Q},
+  {X86::VCVTUQQ2PSZ128rr, X86::VCVTUQQ2PSZ128rmb, TB_BCAST_Q},
+  {X86::VCVTUQQ2PSZ256rr, X86::VCVTUQQ2PSZ256rmb, TB_BCAST_Q},
+  {X86::VCVTUQQ2PSZrr, X86::VCVTUQQ2PSZrmb, TB_BCAST_Q},
   {X86::VCVTUW2PHZ128rr, X86::VCVTUW2PHZ128rmb, TB_BCAST_SH},
   {X86::VCVTUW2PHZ256rr, X86::VCVTUW2PHZ256rmb, TB_BCAST_SH},
   {X86::VCVTUW2PHZrr, X86::VCVTUW2PHZrmb, TB_BCAST_SH},
@@ -6963,15 +6963,15 @@ static const X86FoldTableEntry BroadcastTable2[] = {
   {X86::VCMPPSZ128rri, X86::VCMPPSZ128rmbi, TB_BCAST_SS},
   {X86::VCMPPSZ256rri, X86::VCMPPSZ256rmbi, TB_BCAST_SS},
   {X86::VCMPPSZrri, X86::VCMPPSZrmbi, TB_BCAST_SS},
-  {X86::VCVTDQ2PDZ128rrkz, X86::VCVTDQ2PDZ128rmbkz, TB_BCAST_SD},
-  {X86::VCVTDQ2PDZ256rrkz, X86::VCVTDQ2PDZ256rmbkz, TB_BCAST_SD},
-  {X86::VCVTDQ2PDZrrkz, X86::VCVTDQ2PDZrmbkz, TB_BCAST_SD},
-  {X86::VCVTDQ2PHZ128rrkz, X86::VCVTDQ2PHZ128rmbkz, TB_BCAST_SH},
-  {X86::VCVTDQ2PHZ256rrkz, X86::VCVTDQ2PHZ256rmbkz, TB_BCAST_SH},
-  {X86::VCVTDQ2PHZrrkz, X86::VCVTDQ2PHZrmbkz, TB_BCAST_SH},
-  {X86::VCVTDQ2PSZ128rrkz, X86::VCVTDQ2PSZ128rmbkz, TB_BCAST_SS},
-  {X86::VCVTDQ2PSZ256rrkz, X86::VCVTDQ2PSZ256rmbkz, TB_BCAST_SS},
-  {X86::VCVTDQ2PSZrrkz, X86::VCVTDQ2PSZrmbkz, TB_BCAST_SS},
+  {X86::VCVTDQ2PDZ128rrkz, X86::VCVTDQ2PDZ128rmbkz, TB_BCAST_D},
+  {X86::VCVTDQ2PDZ256rrkz, X86::VCVTDQ2PDZ256rmbkz, TB_BCAST_D},
+  {X86::VCVTDQ2PDZrrkz, X86::VCVTDQ2PDZrmbkz, TB_BCAST_D},
+  {X86::VCVTDQ2PHZ128rrkz, X86::VCVTDQ2PHZ128rmbkz, TB_BCAST_D},
+  {X86::VCVTDQ2PHZ256rrkz, X86::VCVTDQ2PHZ256rmbkz, TB_BCAST_D},
+  {X86::VCVTDQ2PHZrrkz, X86::VCVTDQ2PHZrmbkz, TB_BCAST_D},
+  {X86::VCVTDQ2PSZ128rrkz, X86::VCVTDQ2PSZ128rmbkz, TB_BCAST_D},
+  {X86::VCVTDQ2PSZ256rrkz, X86::VCVTDQ2PSZ256rmbkz, TB_BCAST_D},
+  {X86::VCVTDQ2PSZrrkz, X86::VCVTDQ2PSZrmbkz, TB_BCAST_D},
   {X86::VCVTNE2PS2BF16Z128rr, X86::VCVTNE2PS2BF16Z128rmb, TB_BCAST_SS},
   {X86::VCVTNE2PS2BF16Z256rr, X86::VCVTNE2PS2BF16Z256rmb, TB_BCAST_SS},
   {X86::VCVTNE2PS2BF16Zrr, X86::VCVTNE2PS2BF16Zrmb, TB_BCAST_SS},
@@ -6984,9 +6984,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
   {X86::VCVTPD2PHZ128rrkz, X86::VCVTPD2PHZ128rmbkz, TB_BCAST_SD},
   {X86::VCVTPD2PHZ256rrkz, X86::VCVTPD2PHZ256rmbkz, TB_BCAST_SD},
   {X86::VCVTPD2PHZrrkz, X86::VCVTPD2PHZrmbkz, TB_BCAST_SD},
-  {X86::VCVTPD2PSZ128rrkz, X86::VCVTPD2PSZ128rmbkz, TB_BCAST_SS},
-  {X86::VCVTPD2PSZ256rrkz, X86::VCVTPD2PSZ256rmbkz, TB_BCAST_SS},
-  {X86::VCVTPD2PSZrrkz, X86::VCVTPD2PSZrmbkz, TB_BCAST_SS},
+  {X86::VCVTPD2PSZ128rrkz, X86::VCVTPD2PSZ128rmbkz, TB_BCAST_SD},
+  {X86::VCVTPD2PSZ256rrkz, X86::VCVTPD2PSZ256rmbkz, TB_BCAST_SD},
+  {X86::VCVTPD2PSZrrkz, X86::VCVTPD2PSZrmbkz, TB_BCAST_SD},
   {X86::VCVTPD2QQZ128rrkz, X86::VCVTPD2QQZ128rmbkz, TB_BCAST_SD},
   {X86::VCVTPD2QQZ256rrkz, X86::VCVTPD2QQZ256rmbkz, TB_BCAST_SD},
   {X86::VCVTPD2QQZrrkz, X86::VCVTPD2QQZrmbkz, TB_BCAST_SD},
@@ -7038,15 +7038,15 @@ static const X86FoldTableEntry BroadcastTable2[] = {
   {X86::VCVTPS2UQQZ128rrkz, X86::VCVTPS2UQQZ128rmbkz, TB_BCAST_SS},
   {X86::VCVTPS2UQQZ256rrkz, X86::VCVTPS2UQQZ256rmbkz, TB_BCAST_SS},
   {X86::VCVTPS2UQQZrrkz, X86::VCVTPS2UQQZrmbkz, TB_BCAST_SS},
-  {X86::VCVTQQ2PDZ128rrkz, X86::VCVTQQ2PDZ128rmbkz, TB_BCAST_SD},
-  {X86::VCVTQQ2PDZ256rrkz, X86::VCVTQQ2PDZ256rmbkz, TB_BCAST_SD},
-  {X86::VCVTQQ2PDZrrkz, X86::VCVTQQ2PDZrmbkz, TB_BCAST_SD},
-  {X86::VCVTQQ2PHZ128rrkz, X86::VCVTQQ2PHZ128rmbkz, TB_BCAST_SH},
-  {X86::VCVTQQ2PHZ256rrkz, X86::VCVTQQ2PHZ256rmbkz, TB_BCAST_SH},
-  {X86::VCVTQQ2PHZrrkz, X86::VCVTQQ2PHZrmbkz, TB_BCAST_SH},
-  {X86::VCVTQQ2PSZ128rrkz, X86::VCVTQQ2PSZ128rmbkz, TB_BCAST_SS},
-  {X86::VCVTQQ2PSZ256rrkz, X86::VCVTQQ2PSZ256rmbkz, TB_BCAST_SS},
-  {X86::VCVTQQ2PSZrrkz, X86::VCVTQQ2PSZrmbkz, TB_BCAST_SS},
+  {X86::VCVTQQ2PDZ128rrkz, X86::VCVTQQ2PDZ128rmbkz, TB_BCAST_Q},
+  {X86::VCVTQQ2PDZ256rrkz, X86::VCVTQQ2PDZ256rmbkz, TB_BCAST_Q},
+  {X86::VCVTQQ2PDZrrkz, X86::VCVTQQ2PDZrmbkz, TB_BCAST_Q},
+  {X86::VCVTQQ2PHZ128rrkz, X86::VCVTQQ2PHZ128rmbkz, TB_BCAST_Q},
+  {X86::VCVTQQ2PHZ256rrkz, X86::VCVTQQ2PHZ256rmbkz, TB_BCAST_Q},
+  {X86::VCVTQQ2PHZrrkz, X86::VCVTQQ2PHZrmbkz, TB_BCAST_Q},
+  {X86::VCVTQQ2PSZ128rrkz, X86::VCVTQQ2PSZ128rmbkz, TB_BCAST_Q},
+  {X86::VCVTQQ2PSZ256rrkz, X86::VCVTQQ2PSZ256rmbkz, TB_BCAST_Q},
+  {X86::VCVTQQ2PSZrrkz, X86::VCVTQQ2PSZrmbkz, TB_BCAST_Q},
   {X86::VCVTTPD2DQZ128rrkz, X86::VCVTTPD2DQZ128rmbkz, TB_BCAST_SD},
   {X86::VCVTTPD2DQZ256rrkz, X86::VCVTTPD2DQZ256rmbkz, TB_BCAST_SD},
   {X86::VCVTTPD2DQZrrkz, X86::VCVTTPD2DQZrmbkz, TB_BCAST_SD},
@@ -7089,24 +7089,24 @@ static const X86FoldTableEntry BroadcastTable2[] = {
   {X86::VCVTTPS2UQQZ128rrkz, X86::VCVTTPS2UQQZ128rmbkz, TB_BCAST_SS},
   {X86::VCVTTPS2UQQZ256rrkz, X86::VCVTTPS2UQQZ256rmbkz, TB_BCAST_SS},
   {X86::VCVTTPS2UQQZrrkz, X86::VCVTTPS2UQQZrmbkz, TB_BCAST_SS},
-  {X86::VCVTUDQ2PDZ128rrkz, X86::VCVTUDQ2PDZ128rmbkz, TB_BCAST_SD},
-  {X86::VCVTUDQ2PDZ256rrkz, X86::VCVTUDQ2PDZ256rmbkz, TB_BCAST_SD},
-  {X86::VCVTUDQ2PDZrrkz, X86::VCVTUDQ2PDZrmbkz, TB_BCAST_SD},
-  {X86::VCVTUDQ2PHZ128rrkz, X86::VCVTUDQ2PHZ128rmbkz, TB_BCAST_SH},
-  {X86::VCVTUDQ2PHZ256rrkz, X86::VCVTUDQ2PHZ256rmbkz, TB_BCAST_SH},
-  {X86::VCVTUDQ2PHZrrkz, X86::VCVTUDQ2PHZrmbkz, TB_BCAST_SH},
-  {X86::VCVTUDQ2PSZ128rrkz, X86::VCVTUDQ2PSZ128rmbkz, TB_BCAST_SS},
-  {X86::VCVTUDQ2PSZ256rrkz, X86::VCVTUDQ2PSZ256rmbkz, TB_BCAST_SS},
-  {X86::VCVTUDQ2PSZrrkz, X86::VCVTUDQ2PSZrmbkz, TB_BCAST_SS},
-  {X86::VCVTUQQ2PDZ128rrkz, X86::VCVTUQQ2PDZ128rmbkz, TB_BCAST_SD},
-  {X86::VCVTUQQ2PDZ256rrkz, X86::VCVTUQQ2PDZ256rmbkz, TB_BCAST_SD},
-  {X86::VCVTUQQ2PDZrrkz, X86::VCVTUQQ2PDZrmbkz, TB_BCAST_SD},
-  {X86::VCVTUQQ2PHZ128rrkz, X86::VCVTUQQ2PHZ128rmbkz, TB_BCAST_SH},
-  {X86::VCVTUQQ2PHZ256rrkz, X86::VCVTUQQ2PHZ256rmbkz, TB_BCAST_SH},
-  {X86::VCVTUQQ2PHZrrkz, X86::VCVTUQQ2PHZrmbkz, TB_BCAST_SH},
-  {X86::VCVTUQQ2PSZ128rrkz, X86::VCVTUQQ2PSZ128rmbkz, TB_BCAST_SS},
-  {X86::VCVTUQQ2PSZ256rrkz, X86::VCVTUQQ2PSZ256rmbkz, TB_BCAST_SS},
-  {X86::VCVTUQQ2PSZrrkz, X86::VCVTUQQ2PSZrmbkz, TB_BCAST_SS},
+  {X86::VCVTUDQ2PDZ128rrkz, X86::VCVTUDQ2PDZ128rmbkz, TB_BCAST_D},
+  {X86::VCVTUDQ2PDZ256rrkz, X86::VCVTUDQ2PDZ256rmbkz, TB_BCAST_D},
+  {X86::VCVTUDQ2PDZrrkz, X86::VCVTUDQ2PDZrmbkz, TB_BCAST_D},
+  {X86::VCVTUDQ2PHZ128rrkz, X86::VCVTUDQ2PHZ128rmbkz, TB_BCAST_D},
+  {X86::VCVTUDQ2PHZ256rrkz, X86::VCVTUDQ2PHZ256rmbkz, TB_BCAST_D},
+  {X86::VCVTUDQ2PHZrrkz, X86::VCVTUDQ2PHZrmbkz, TB_BCAST_D},
+  {X86::VCVTUDQ2PSZ128rrkz, X86::VCVTUDQ2PSZ128rmbkz, TB_BCAST_D},
+  {X86::VCVTUDQ2PSZ256rrkz, X86::VCVTUDQ2PSZ256rmbkz, TB_BCAST_D},
+  {X86::VCVTUDQ2PSZrrkz, X86::VCVTUDQ2PSZrmbkz, TB_BCAST_D},
+  {X86::VCVTUQQ2PDZ128rrkz, X86::VCVTUQQ2PDZ128rmbkz, TB_BCAST_Q},
+  {X86::VCVTUQQ2PDZ256rrkz, X86::VCVTUQQ2PDZ256rmbkz, TB_BCAST_Q},
+  {X86::VCVTUQQ2PDZrrkz, X86::VCVTUQQ2PDZrmbkz, TB_BCAST_Q},
+  {X86::VCVTUQQ2PHZ128rrkz, X86::VCVTUQQ2PHZ128rmbkz, TB_BCAST_Q},
+  {X86::VCVTUQQ2PHZ256rrkz, X86::VCVTUQQ2PHZ256rmbkz, TB_BCAST_Q},
+  {X86::VCVTUQQ2PHZrrkz, X86::VCVTUQQ2PHZrmbkz, TB_BCAST_Q},
+  {X86::VCVTUQQ2PSZ128rrkz, X86::VCVTUQQ2PSZ128rmbkz, TB_BCAST_Q},
+  {X86::VCVTUQQ2PSZ256rrkz, X86::VCVTUQQ2PSZ256rmbkz, TB_BCAST_Q},
+  {X86::VCVTUQQ2PSZrrkz, X86::VCVTUQQ2PSZrmbkz, TB_BCAST_Q},
   {X86::VCVTUW2PHZ128rrkz, X86::VCVTUW2PHZ128rmbkz, TB_BCAST_SH},
   {X86::VCVTUW2PHZ256rrkz, X86::VCVTUW2PHZ256rmbkz, TB_BCAST_SH},
   {X86::VCVTUW2PHZrrkz, X86::VCVTUW2PHZrmbkz, TB_BCAST_SH},
@@ -7166,9 +7166,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
   {X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rmb, TB_BCAST_SD},
   {X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rmb, TB_BCAST_SD},
   {X86::VMAXCPDZrr, X86::VMAXCPDZrmb, TB_BCAST_SD},
-  {X86::VMAXCPHZ128rr, X86::VMAXCPHZ128rmb, TB_BCAST_SS},
-  {X86::VMAXCPHZ256rr, X86::VMAXCPHZ256rmb, TB_BCAST_SS},
-  {X86::VMAXCPHZrr, X86::VMAXCPHZrmb, TB_BCAST_SS},
+  {X86::VMAXCPHZ128rr, X86::VMAXCPHZ128rmb, TB_BCAST_SH},
+  {X86::VMAXCPHZ256rr, X86::VMAXCPHZ256rmb, TB_BCAST_SH},
+  {X86::VMAXCPHZrr, X86::VMAXCPHZrmb, TB_BCAST_SH},
   {X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rmb, TB_BCAST_SS},
   {X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rmb, TB_BCAST_SS},
   {X86::VMAXCPSZrr, X86::VMAXCPSZrmb, TB_BCAST_SS},
@@ -7184,9 +7184,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
   {X86::VMINCPDZ128rr, X86::VMINCPDZ128rmb, TB_BCAST_SD},
   {X86::VMINCPDZ256rr, X86::VMINCPDZ256rmb, TB_BCAST_SD},
   {X86::VMINCPDZrr, X86::VMINCPDZrmb, TB_BCAST_SD},
-  {X86::VMINCPHZ128rr, X86::VMINCPHZ128rmb, TB_BCAST_SS},
-  {X86::VMINCPHZ256rr, X86::VMINCPHZ256rmb, TB_BCAST_SS},
-  {X86::VMINCPHZrr, X86::VMINCPHZrmb, TB_BCAST_SS},
+  {X86::VMINCPHZ128rr, X86::VMINCPHZ128rmb, TB_BCAST_SH},
+  {X86::VMINCPHZ256rr, X86::VMINCPHZ256rmb, TB_BCAST_SH},
+  {X86::VMINCPHZrr, X86::VMINCPHZrmb, TB_BCAST_SH},
   {X86::VMINCPSZ128rr, X86::VMINCPSZ128rmb, TB_BCAST_SS},
   {X86::VMINCPSZ256rr, X86::VMINCPSZ256rmb, TB_BCAST_SS},
   {X86::VMINCPSZrr, X86::VMINCPSZrmb, TB_BCAST_SS},
@@ -7460,15 +7460,15 @@ static const X86FoldTableEntry BroadcastTable2[] = {
   {X86::VPTESTNMQZ128rr, X86::VPTESTNMQZ128rmb, TB_BCAST_Q},
   {X86::VPTESTNMQZ256rr, X86::VPTESTNMQZ256rmb, TB_BCAST_Q},
   {X86::VPTESTNMQZrr, X86::VPTESTNMQZrmb, TB_BCAST_Q},
-  {X86::VPUNPCKHDQZ128rr, X86::VPUNPCKHDQZ128rmb, TB_BCAST_Q},
-  {X86::VPUNPCKHDQZ256rr, X86::VPUNPCKHDQZ256rmb, TB_BCAST_Q},
-  {X86::VPUNPCKHDQZrr, X86::VPUNPCKHDQZrmb, TB_BCAST_Q},
+  {X86::VPUNPCKHDQZ128rr, X86::VPUNPCKHDQZ128rmb, TB_BCAST_D},
+  {X86::VPUNPCKHDQZ256rr, X86::VPUNPCKHDQZ256rmb, TB_BCAST_D},
+  {X86::VPUNPCKHDQZrr, X86::VPUNPCKHDQZrmb, TB_BCAST_D},
   {X86::VPUNPCKHQDQZ128rr, X86::VPUNPCKHQDQZ128rmb, TB_BCAST_Q},
   {X86::VPUNPCKHQDQZ256rr, X86::VPUNPCKHQDQZ256rmb, TB_BCAST_Q},
   {X86::VPUNPCKHQDQZrr, X86::VPUNPCKHQDQZrmb, TB_BCAST_Q},
-  {X86::VPUNPCKLDQZ128rr, X86::VPUNPCKLDQZ128rmb, TB_BCAST_Q},
-  {X86::VPUNPCKLDQZ256rr, X86::VPUNPCKLDQZ256rmb, TB_BCAST_Q},
-  {X86::VPUNPCKLDQZrr, X86::VPUNPCKLDQZrmb, TB_BCAST_Q},
+  {X86::VPUNPCKLDQZ128rr, X86::VPUNPCKLDQZ128rmb, TB_BCAST_D},
+  {X86::VPUNPCKLDQZ256rr, X86::VPUNPCKLDQZ256rmb, TB_BCAST_D},
+  {X86::VPUNPCKLDQZrr, X86::VPUNPCKLDQZrmb, TB_BCAST_D},
   {X86::VPUNPCKLQDQZ128rr, X86::VPUNPCKLQDQZ128rmb, TB_BCAST_Q},
   {X86::VPUNPCKLQDQZ256rr, X86::VPUNPCKLQDQZ256rmb, TB_BCAST_Q},
   {X86::VPUNPCKLQDQZrr, X86::VPUNPCKLQDQZrmb, TB_BCAST_Q},
@@ -7628,15 +7628,15 @@ static const X86FoldTableEntry BroadcastTable3[] = {
   {X86::VCMPPSZ128rrik, X86::VCMPPSZ128rmbik, TB_BCAST_SS},
   {X86::VCMPPSZ256rrik, X86::VCMPPSZ256rmbik, TB_BCAST_SS},
   {X86::VCMPPSZrrik, X86::VCMPPSZrmbik, TB_BCAST_SS},
-  {X86::VCVTDQ2PDZ128rrk, X86::VCVTDQ2PDZ128rmbk, TB_BCAST_SD},
-  {X86::VCVTDQ2PDZ256rrk, X86::VCVTDQ2PDZ256rmbk, TB_BCAST_SD},
-  {X86::VCVTDQ2PDZrrk, X86::VCVTDQ2PDZrmbk, TB_BCAST_SD},
-  {X86::VCVTDQ2PHZ128rrk, X86::VCVTDQ2PHZ128rmbk, TB_BCAST_SH},
-  {X86::VCVTDQ2PHZ256rrk, X86::VCVTDQ2PHZ256rmbk, TB_BCAST_SH},
-  {X86::VCVTDQ2PHZrrk, X86::VCVTDQ2PHZrmbk, TB_BCAST_SH},
-  {X86::VCVTDQ2PSZ128rrk, X86::VCVTDQ2PSZ128rmbk, TB_BCAST_SS},
-  {X86::VCVTDQ2PSZ256rrk, X86::VCVTDQ2PSZ256rmbk, TB_BCAST_SS},
-  {X86::VCVTDQ2PSZrrk, X86::VCVTDQ2PSZrmbk, TB_BCAST_SS},
+  {X86::VCVTDQ2PDZ128rrk, X86::VCVTDQ2PDZ128rmbk, TB_BCAST_D},
+  {X86::VCVTDQ2PDZ256rrk, X86::VCVTDQ2PDZ256rmbk, TB_BCAST_D},
+  {X86::VCVTDQ2PDZrrk, X86::VCVTDQ2PDZrmbk, TB_BCAST_D},
+  {X86::VCVTDQ2PHZ128rrk, X86::VCVTDQ2PHZ128rmbk, TB_BCAST_D},
+  {X86::VCVTDQ2PHZ256rrk, X86::VCVTDQ2PHZ256rmbk, TB_BCAST_D},
+  {X86::VCVTDQ2PHZrrk, X86::VCVTDQ2PHZrmbk, TB_BCAST_D},
+  {X86::VCVTDQ2PSZ128rrk, X86::VCVTDQ2PSZ128rmbk, TB_BCAST_D},
+  {X86::VCVTDQ2PSZ256rrk, X86::VCVTDQ2PSZ256rmbk, TB_BCAST_D},
+  {X86::VCVTDQ2PSZrrk, X86::VCVTDQ2PSZrmbk, TB_BCAST_D},
   {X86::VCVTNE2PS2BF16Z128rrkz, X86::VCVTNE2PS2BF16Z128rmbkz, TB_BCAST_SS},
   {X86::VCVTNE2PS2BF16Z256rrkz, X86::VCVTNE2PS2BF16Z256rmbkz, TB_BCAST_SS},
   {X86::VCVTNE2PS2BF16Zrrkz, X86::VCVTNE2PS2BF16Zrmbkz, TB_BCAST_SS},
@@ -7649,9 +7649,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
   {X86::VCVTPD2PHZ128rrk, X86::VCVTPD2PHZ128rmbk, TB_BCAST_SD},
   {X86::VCVTPD2PHZ256rrk, X86::VCVTPD2PHZ256rmbk, TB_BCAST_SD},
   {X86::VCVTPD2PHZrrk, X86::VCVTPD2PHZrmbk, TB_BCAST_SD},
-  {X86::VCVTPD2PSZ128rrk, X86::VCVTPD2PSZ128rmbk, TB_BCAST_SS},
-  {X86::VCVTPD2PSZ256rrk, X86::VCVTPD2PSZ256rmbk, TB_BCAST_SS},
-  {X86::VCVTPD2PSZrrk, X86::VCVTPD2PSZrmbk, TB_BCAST_SS},
+  {X86::VCVTPD2PSZ128rrk, X86::VCVTPD2PSZ128rmbk, TB_BCAST_SD},
+  {X86::VCVTPD2PSZ256rrk, X86::VCVTPD2PSZ256rmbk, TB_BCAST_SD},
+  {X86::VCVTPD2PSZrrk, X86::VCVTPD2PSZrmbk, TB_BCAST_SD},
   {X86::VCVTPD2QQZ128rrk, X86::VCVTPD2QQZ128rmbk, TB_BCAST_SD},
   {X86::VCVTPD2QQZ256rrk, X86::VCVTPD2QQZ256rmbk, TB_BCAST_SD},
   {X86::VCVTPD2QQZrrk, X86::VCVTPD2QQZrmbk, TB_BCAST_SD},
@@ -7703,15 +7703,15 @@ static const X86FoldTableEntry BroadcastTable3[] = {
   {X86::VCVTPS2UQQZ128rrk, X86::VCVTPS2UQQZ128rmbk, TB_BCAST_SS},
   {X86::VCVTPS2UQQZ256rrk, X86::VCVTPS2UQQZ256rmbk, TB_BCAST_SS},
   {X86::VCVTPS2UQQZrrk, X86::VCVTPS2UQQZrmbk, TB_BCAST_SS},
-  {X86::VCVTQQ2PDZ128rrk, X86::VCVTQQ2PDZ128rmbk, TB_BCAST_SD},
-  {X86::VCVTQQ2PDZ256rrk, X86::VCVTQQ2PDZ256rmbk, TB_BCAST_SD},
-  {X86::VCVTQQ2PDZrrk, X86::VCVTQQ2PDZrmbk, TB_BCAST_SD},
-  {X86::VCVTQQ2PHZ128rrk, X86::VCVTQQ2PHZ128rmbk, TB_BCAST_SH},
-  {X86::VCVTQQ2PHZ256rrk, X86::VCVTQQ2PHZ256rmbk, TB_BCAST_SH},
-  {X86::VCVTQQ2PHZrrk, X86::VCVTQQ2PHZrmbk, TB_BCAST_SH},
-  {X86::VCVTQQ2PSZ128rrk, X86::VCVTQQ2PSZ128rmbk, TB_BCAST_SS},
-  {X86::VCVTQQ2PSZ256rrk, X86::VCVTQQ2PSZ256rmbk, TB_BCAST_SS},
-  {X86::VCVTQQ2PSZrrk, X86::VCVTQQ2PSZrmbk, TB_BCAST_SS},
+  {X86::VCVTQQ2PDZ128rrk, X86::VCVTQQ2PDZ128rmbk, TB_BCAST_Q},
+  {X86::VCVTQQ2PDZ256rrk, X86::VCVTQQ2PDZ256rmbk, TB_BCAST_Q},
+  {X86::VCVTQQ2PDZrrk, X86::VCVTQQ2PDZrmbk, TB_BCAST_Q},
+  {X86::VCVTQQ2PHZ128rrk, X86::VCVTQQ2PHZ128rmbk, TB_BCAST_Q},
+  {X86::VCVTQQ2PHZ256rrk, X86::VCVTQQ2PHZ256rmbk, TB_BCAST_Q},
+  {X86::VCVTQQ2PHZrrk, X86::VCVTQQ2PHZrmbk, TB_BCAST_Q},
+  {X86::VCVTQQ2PSZ128rrk, X86::VCVTQQ2PSZ128rmbk, TB_BCAST_Q},
+  {X86::VCVTQQ2PSZ256rrk, X86::VCVTQQ2PSZ256rmbk, TB_BCAST_Q},
+  {X86::VCVTQQ2PSZrrk, X86::VCVTQQ2PSZrmbk, TB_BCAST_Q},
   {X86::VCVTTPD2DQZ128rrk, X86::VCVTTPD2DQZ128rmbk, TB_BCAST_SD},
   {X86::VCVTTPD2DQZ256rrk, X86::VCVTTPD2DQZ256rmbk, TB_BCAST_SD},
   {X86::VCVTTPD2DQZrrk, X86::VCVTTPD2DQZrmbk, TB_BCAST_SD},
@@ -7754,24 +7754,24 @@ static const X86FoldTableEntry BroadcastTable3[] = {
   {X86::VCVTTPS2UQQZ128rrk, X86::VCVTTPS2UQQZ128rmbk, TB_BCAST_SS},
   {X86::VCVTTPS2UQQZ256rrk, X86::VCVTTPS2UQQZ256rmbk, TB_BCAST_SS},
   {X86::VCVTTPS2UQQZrrk, X86::VCVTTPS2UQQZrmbk, TB_BCAST_SS},
-  {X86::VCVTUDQ2PDZ128rrk, X86::VCVTUDQ2PDZ128rmbk, TB_BCAST_SD},
-  {X86::VCVTUDQ2PDZ256rrk, X86::VCVTUDQ2PDZ256rmbk, TB_BCAST_SD},
-  {X86::VCVTUDQ2PDZrrk, X86::VCVTUDQ2PDZrmbk, TB_BCAST_SD},
-  {X86::VCVTUDQ2PHZ128rrk, X86::VCVTUDQ2PHZ128rmbk, TB_BCAST_SH},
-  {X86::VCVTUDQ2PHZ256rrk, X86::VCVTUDQ2PHZ256rmbk, TB_BCAST_SH},
-  {X86::VCVTUDQ2PHZrrk, X86::VCVTUDQ2PHZrmbk, TB_BCAST_SH},
-  {X86::VCVTUDQ2PSZ128rrk, X86::VCVTUDQ2PSZ128rmbk, TB_BCAST_SS},
-  {X86::VCVTUDQ2PSZ256rrk, X86::VCVTUDQ2PSZ256rmbk, TB_BCAST_SS},
-  {X86::VCVTUDQ2PSZrrk, X86::VCVTUDQ2PSZrmbk, TB_BCAST_SS},
-  {X86::VCVTUQQ2PDZ128rrk, X86::VCVTUQQ2PDZ128rmbk, TB_BCAST_SD},
-  {X86::VCVTUQQ2PDZ256rrk, X86::VCVTUQQ2PDZ256rmbk, TB_BCAST_SD},
-  {X86::VCVTUQQ2PDZrrk, X86::VCVTUQQ2PDZrmbk, TB_BCAST_SD},
-  {X86::VCVTUQQ2PHZ128rrk, X86::VCVTUQQ2PHZ128rmbk, TB_BCAST_SH},
-  {X86::VCVTUQQ2PHZ256rrk, X86::VCVTUQQ2PHZ256rmbk, TB_BCAST_SH},
-  {X86::VCVTUQQ2PHZrrk, X86::VCVTUQQ2PHZrmbk, TB_BCAST_SH},
-  {X86::VCVTUQQ2PSZ128rrk, X86::VCVTUQQ2PSZ128rmbk, TB_BCAST_SS},
-  {X86::VCVTUQQ2PSZ256rrk, X86::VCVTUQQ2PSZ256rmbk, TB_BCAST_SS},
-  {X86::VCVTUQQ2PSZrrk, X86::VCVTUQQ2PSZrmbk, TB_BCAST_SS},
+  {X86::VCVTUDQ2PDZ128rrk, X86::VCVTUDQ2PDZ128rmbk, TB_BCAST_D},
+  {X86::VCVTUDQ2PDZ256rrk, X86::VCVTUDQ2PDZ256rmbk, TB_BCAST_D},
+  {X86::VCVTUDQ2PDZrrk, X86::VCVTUDQ2PDZrmbk, TB_BCAST_D},
+  {X86::VCVTUDQ2PHZ128rrk, X86::VCVTUDQ2PHZ128rmbk, TB_BCAST_D},
+  {X86::VCVTUDQ2PHZ256rrk, X86::VCVTUDQ2PHZ256rmbk, TB_BCAST_D},
+  {X86::VCVTUDQ2PHZrrk, X86::VCVTUDQ2PHZrmbk, TB_BCAST_D},
+  {X86::VCVTUDQ2PSZ128rrk, X86::VCVTUDQ2PSZ128rmbk, TB_BCAST_D},
+  {X86::VCVTUDQ2PSZ256rrk, X86::VCVTUDQ2PSZ256rmbk, TB_BCAST_D},
+  {X86::VCVTUDQ2PSZrrk, X86::VCVTUDQ2PSZrmbk, TB_BCAST_D},
+  {X86::VCVTUQQ2PDZ128rrk, X86::VCVTUQQ2PDZ128rmbk, TB_BCAST_Q},
+  {X86::VCVTUQQ2PDZ256rrk, X86::VCVTUQQ2PDZ256rmbk, TB_BCAST_Q},
+  {X86::VCVTUQQ2PDZrrk, X86::VCVTUQQ2PDZrmbk, TB_BCAST_Q},
+  {X86::VCVTUQQ2PHZ128rrk, X86::VCVTUQQ2PHZ128rmbk, TB_BCAST_Q},
+  {X86::VCVTUQQ2PHZ256rrk, X86::VCVTUQQ2PHZ256rmbk, TB_BCAST_Q},
+  {X86::VCVTUQQ2PHZrrk, X86::VCVTUQQ2PHZrmbk, TB_BCAST_Q},
+  {X86::VCVTUQQ2PSZ128rrk, X86::VCVTUQQ2PSZ128rmbk, TB_BCAST_Q},
+  {X86::VCVTUQQ2PSZ256rrk, X86::VCVTUQQ2PSZ256rmbk, TB_BCAST_Q},
+  {X86::VCVTUQQ2PSZrrk, X86::VCVTUQQ2PSZrmbk, TB_BCAST_Q},
   {X86::VCVTUW2PHZ128rrk, X86::VCVTUW2PHZ128rmbk, TB_BCAST_SH},
   {X86::VCVTUW2PHZ256rrk, X86::VCVTUW2PHZ256rmbk, TB_BCAST_SH},
   {X86::VCVTUW2PHZrrk, X86::VCVTUW2PHZrmbk, TB_BCAST_SH},
@@ -7999,9 +7999,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
   {X86::VMAXCPDZ128rrkz, X86::VMAXCPDZ128rmbkz, TB_BCAST_SD},
   {X86::VMAXCPDZ256rrkz, X86::VMAXCPDZ256rmbkz, TB_BCAST_SD},
   {X86::VMAXCPDZrrkz, X86::VMAXCPDZrmbkz, TB_BCAST_SD},
-  {X86::VMAXCPHZ128rrkz, X86::VMAXCPHZ128rmbkz, TB_BCAST_SS},
-  {X86::VMAXCPHZ256rrkz, X86::VMAXCPHZ256rmbkz, TB_BCAST_SS},
-  {X86::VMAXCPHZrrkz, X86::VMAXCPHZrmbkz, TB_BCAST_SS},
+  {X86::VMAXCPHZ128rrkz, X86::VMAXCPHZ128rmbkz, TB_BCAST_SH},
+  {X86::VMAXCPHZ256rrkz, X86::VMAXCPHZ256rmbkz, TB_BCAST_SH},
+  {X86::VMAXCPHZrrkz, X86::VMAXCPHZrmbkz, TB_BCAST_SH},
   {X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmbkz, TB_BCAST_SS},
   {X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmbkz, TB_BCAST_SS},
   {X86::VMAXCPSZrrkz, X86::VMAXCPSZrmbkz, TB_BCAST_SS},
@@ -8017,9 +8017,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
   {X86::VMINCPDZ128rrkz, X86::VMINCPDZ128rmbkz, TB_BCAST_SD},
   {X86::VMINCPDZ256rrkz, X86::VMINCPDZ256rmbkz, TB_BCAST_SD},
   {X86::VMINCPDZrrkz, X86::VMINCPDZrmbkz, TB_BCAST_SD},
-  {X86::VMINCPHZ128rrkz, X86::VMINCPHZ128rmbkz, TB_BCAST_SS},
-  {X86::VMINCPHZ256rrkz, X86::VMINCPHZ256rmbkz, TB_BCAST_SS},
-  {X86::VMINCPHZrrkz, X86::VMINCPHZrmbkz, TB_BCAST_SS},
+  {X86::VMINCPHZ128rrkz, X86::VMINCPHZ128rmbkz, TB_BCAST_SH},
+  {X86::VMINCPHZ256rrkz, X86::VMINCPHZ256rmbkz, TB_BCAST_SH},
+  {X86::VMINCPHZrrkz, X86::VMINCPHZrmbkz, TB_BCAST_SH},
   {X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmbkz, TB_BCAST_SS},
   {X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmbkz, TB_BCAST_SS},
   {X86::VMINCPSZrrkz, X86::VMINCPSZrmbkz, TB_BCAST_SS},
@@ -8113,15 +8113,15 @@ static const X86FoldTableEntry BroadcastTable3[] = {
   {X86::VPCONFLICTQZ128rrk, X86::VPCONFLICTQZ128rmbk, TB_BCAST_Q},
   {X86::VPCONFLICTQZ256rrk, X86::VPCONFLICTQZ256rmbk, TB_BCAST_Q},
   {X86::VPCONFLICTQZrrk, X86::VPCONFLICTQZrmbk, TB_BCAST_Q},
-  {X86::VPDPBUSDSZ128r, X86::VPDPBUSDSZ128mb, TB_BCAST_SD},
-  {X86::VPDPBUSDSZ256r, X86::VPDPBUSDSZ256mb, TB_BCAST_SD},
-  {X86::VPDPBUSDSZr, X86::VPDPBUSDSZmb, TB_BCAST_SD},
+  {X86::VPDPBUSDSZ128r, X86::VPDPBUSDSZ128mb, TB_BCAST_D},
+  {X86::VPDPBUSDSZ256r, X86::VPDPBUSDSZ256mb, TB_BCAST_D},
+  {X86::VPDPBUSDSZr, X86::VPDPBUSDSZmb, TB_BCAST_D},
   {X86::VPDPBUSDZ128r, X86::VPDPBUSDZ128mb, TB_BCAST_D},
   {X86::VPDPBUSDZ256r, X86::VPDPBUSDZ256mb, TB_BCAST_D},
   {X86::VPDPBUSDZr, X86::VPDPBUSDZmb, TB_BCAST_D},
-  {X86::VPDPWSSDSZ128r, X86::VPDPWSSDSZ128mb, TB_BCAST_SD},
-  {X86::VPDPWSSDSZ256r, X86::VPDPWSSDSZ256mb, TB_BCAST_SD},
-  {X86::VPDPWSSDSZr, X86::VPDPWSSDSZmb, TB_BCAST_SD},
+  {X86::VPDPWSSDSZ128r, X86::VPDPWSSDSZ128mb, TB_BCAST_D},
+  {X86::VPDPWSSDSZ256r, X86::VPDPWSSDSZ256mb, TB_BCAST_D},
+  {X86::VPDPWSSDSZr, X86::VPDPWSSDSZmb, TB_BCAST_D},
   {X86::VPDPWSSDZ128r, X86::VPDPWSSDZ128mb, TB_BCAST_D},
   {X86::VPDPWSSDZ256r, X86::VPDPWSSDZ256mb, TB_BCAST_D},
   {X86::VPDPWSSDZr, X86::VPDPWSSDZmb, TB_BCAST_D},
@@ -8347,15 +8347,15 @@ static const X86FoldTableEntry BroadcastTable3[] = {
   {X86::VPTESTNMQZ128rrk, X86::VPTESTNMQZ128rmbk, TB_BCAST_Q},
   {X86::VPTESTNMQZ256rrk, X86::VPTESTNMQZ256rmbk, TB_BCAST_Q},
   {X86::VPTESTNMQZrrk, X86::VPTESTNMQZrmbk, TB_BCAST_Q},
-  {X86::VPUNPCKHDQZ128rrkz, X86::VPUNPCKHDQZ128rmbkz, TB_BCAST_Q},
-  {X86::VPUNPCKHDQZ256rrkz, X86::VPUNPCKHDQZ256rmbkz, TB_BCAST_Q},
-  {X86::VPUNPCKHDQZrrkz, X86::VPUNPCKHDQZrmbkz, TB_BCAST_Q},
+  {X86::VPUNPCKHDQZ128rrkz, X86::VPUNPCKHDQZ128rmbkz, TB_BCAST_D},
+  {X86::VPUNPCKHDQZ256rrkz, X86::VPUNPCKHDQZ256rmbkz, TB_BCAST_D},
+  {X86::VPUNPCKHDQZrrkz, X86::VPUNPCKHDQZrmbkz, TB_BCAST_D},
   {X86::VPUNPCKHQDQZ128rrkz, X86::VPUNPCKHQDQZ128rmbkz, TB_BCAST_Q},
   {X86::VPUNPCKHQDQZ256rrkz, X86::VPUNPCKHQDQZ256rmbkz, TB_BCAST_Q},
   {X86::VPUNPCKHQDQZrrkz, X86::VPUNPCKHQDQZrmbkz, TB_BCAST_Q},
-  {X86::VPUNPCKLDQZ128rrkz, X86::VPUNPCKLDQZ128rmbkz, TB_BCAST_Q},
-  {X86::VPUNPCKLDQZ256rrkz, X86::VPUNPCKLDQZ256rmbkz, TB_BCAST_Q},
-  {X86::VPUNPCKLDQZrrkz, X86::VPUNPCKLDQZrmbkz, TB_BCAST_Q},
+  {X86::VPUNPCKLDQZ128rrkz, X86::VPUNPCKLDQZ128rmbkz, TB_BCAST_D},
+  {X86::VPUNPCKLDQZ256rrkz, X86::VPUNPCKLDQZ256rmbkz, TB_BCAST_D},
+  {X86::VPUNPCKLDQZrrkz, X86::VPUNPCKLDQZrmbkz, TB_BCAST_D},
   {X86::VPUNPCKLQDQZ128rrkz, X86::VPUNPCKLQDQZ128rmbkz, TB_BCAST_Q},
   {X86::VPUNPCKLQDQZ256rrkz, X86::VPUNPCKLQDQZ256rmbkz, TB_BCAST_Q},
   {X86::VPUNPCKLQDQZrrkz, X86::VPUNPCKLQDQZrmbkz, TB_BCAST_Q},
@@ -8881,9 +8881,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
   {X86::VMAXCPDZ128rrk, X86::VMAXCPDZ128rmbk, TB_BCAST_SD},
   {X86::VMAXCPDZ256rrk, X86::VMAXCPDZ256rmbk, TB_BCAST_SD},
   {X86::VMAXCPDZrrk, X86::VMAXCPDZrmbk, TB_BCAST_SD},
-  {X86::VMAXCPHZ128rrk, X86::VMAXCPHZ128rmbk, TB_BCAST_SS},
-  {X86::VMAXCPHZ256rrk, X86::VMAXCPHZ256rmbk, TB_BCAST_SS},
-  {X86::VMAXCPHZrrk, X86::VMAXCPHZrmbk, TB_BCAST_SS},
+  {X86::VMAXCPHZ128rrk, X86::VMAXCPHZ128rmbk, TB_BCAST_SH},
+  {X86::VMAXCPHZ256rrk, X86::VMAXCPHZ256rmbk, TB_BCAST_SH},
+  {X86::VMAXCPHZrrk, X86::VMAXCPHZrmbk, TB_BCAST_SH},
   {X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmbk, TB_BCAST_SS},
   {X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmbk, TB_BCAST_SS},
   {X86::VMAXCPSZrrk, X86::VMAXCPSZrmbk, TB_BCAST_SS},
@@ -8899,9 +8899,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
   {X86::VMINCPDZ128rrk, X86::VMINCPDZ128rmbk, TB_BCAST_SD},
   {X86::VMINCPDZ256rrk, X86::VMINCPDZ256rmbk, TB_BCAST_SD},
   {X86::VMINCPDZrrk, X86::VMINCPDZrmbk, TB_BCAST_SD},
-  {X86::VMINCPHZ128rrk, X86::VMINCPHZ128rmbk, TB_BCAST_SS},
-  {X86::VMINCPHZ256rrk, X86::VMINCPHZ256rmbk, TB_BCAST_SS},
-  {X86::VMINCPHZrrk, X86::VMINCPHZrmbk, TB_BCAST_SS},
+  {X86::VMINCPHZ128rrk, X86::VMINCPHZ128rmbk, TB_BCAST_SH},
+  {X86::VMINCPHZ256rrk, X86::VMINCPHZ256rmbk, TB_BCAST_SH},
+  {X86::VMINCPHZrrk, X86::VMINCPHZrmbk, TB_BCAST_SH},
   {X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmbk, TB_BCAST_SS},
   {X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmbk, TB_BCAST_SS},
   {X86::VMINCPSZrrk, X86::VMINCPSZrmbk, TB_BCAST_SS},
@@ -8953,24 +8953,24 @@ static const X86FoldTableEntry BroadcastTable4[] = {
   {X86::VPANDQZ128rrk, X86::VPANDQZ128rmbk, TB_BCAST_Q},
   {X86::VPANDQZ256rrk, X86::VPANDQZ256rmbk, TB_BCAST_Q},
   {X86::VPANDQZrrk, X86::VPANDQZrmbk, TB_BCAST_Q},
-  {X86::VPDPBUSDSZ128rk, X86::VPDPBUSDSZ128mbk, TB_BCAST_SD},
-  {X86::VPDPBUSDSZ128rkz, X86::VPDPBUSDSZ128mbkz, TB_BCAST_SD},
-  {X86::VPDPBUSDSZ256rk, X86::VPDPBUSDSZ256mbk, TB_BCAST_SD},
-  {X86::VPDPBUSDSZ256rkz, X86::VPDPBUSDSZ256mbkz, TB_BCAST_SD},
-  {X86::VPDPBUSDSZrk, X86::VPDPBUSDSZmbk, TB_BCAST_SD},
-  {X86::VPDPBUSDSZrkz, X86::VPDPBUSDSZmbkz, TB_BCAST_SD},
+  {X86::VPDPBUSDSZ128rk, X86::VPDPBUSDSZ128mbk, TB_BCAST_D},
+  {X86::VPDPBUSDSZ128rkz, X86::VPDPBUSDSZ128mbkz, TB_BCAST_D},
+  {X86::VPDPBUSDSZ256rk, X86::VPDPBUSDSZ256mbk, TB_BCAST_D},
+  {X86::VPDPBUSDSZ256rkz, X86::VPDPBUSDSZ256mbkz, TB_BCAST_D},
+  {X86::VPDPBUSDSZrk, X86::VPDPBUSDSZmbk, TB_BCAST_D},
+  {X86::VPDPBUSDSZrkz, X86::VPDPBUSDSZmbkz, TB_BCAST_D},
   {X86::VPDPBUSDZ128rk, X86::VPDPBUSDZ128mbk, TB_BCAST_D},
   {X86::VPDPBUSDZ128rkz, X86::VPDPBUSDZ128mbkz, TB_BCAST_D},
   {X86::VPDPBUSDZ256rk, X86::VPDPBUSDZ256mbk, TB_BCAST_D},
   {X86::VPDPBUSDZ256rkz, X86::VPDPBUSDZ256mbkz, TB_BCAST_D},
   {X86::VPDPBUSDZrk, X86::VPDPBUSDZmbk, TB_BCAST_D},
   {X86::VPDPBUSDZrkz, X86::VPDPBUSDZmbkz, TB_BCAST_D},
-  {X86::VPDPWSSDSZ128rk, X86::VPDPWSSDSZ128mbk, TB_BCAST_SD},
-  {X86::VPDPWSSDSZ128rkz, X86::VPDPWSSDSZ128mbkz, TB_BCAST_SD},
-  {X86::VPDPWSSDSZ256rk, X86::VPDPWSSDSZ256mbk, TB_BCAST_SD},
-  {X86::VPDPWSSDSZ256rkz, X86::VPDPWSSDSZ256mbkz, TB_BCAST_SD},
-  {X86::VPDPWSSDSZrk, X86::VPDPWSSDSZmbk, TB_BCAST_SD},
-  {X86::VPDPWSSDSZrkz, X86::VPDPWSSDSZmbkz, TB_BCAST_SD},
+  {X86::VPDPWSSDSZ128rk, X86::VPDPWSSDSZ128mbk, TB_BCAST_D},
+  {X86::VPDPWSSDSZ128rkz, X86::VPDPWSSDSZ128mbkz, TB_BCAST_D},
+  {X86::VPDPWSSDSZ256rk, X86::VPDPWSSDSZ256mbk, TB_BCAST_D},
+  {X86::VPDPWSSDSZ256rkz, X86::VPDPWSSDSZ256mbkz, TB_BCAST_D},
+  {X86::VPDPWSSDSZrk, X86::VPDPWSSDSZmbk, TB_BCAST_D},
+  {X86::VPDPWSSDSZrkz, X86::VPDPWSSDSZmbkz, TB_BCAST_D},
   {X86::VPDPWSSDZ128rk, X86::VPDPWSSDZ128mbk, TB_BCAST_D},
   {X86::VPDPWSSDZ128rkz, X86::VPDPWSSDZ128mbkz, TB_BCAST_D},
   {X86::VPDPWSSDZ256rk, X86::VPDPWSSDZ256mbk, TB_BCAST_D},
@@ -9180,15 +9180,15 @@ static const X86FoldTableEntry BroadcastTable4[] = {
   {X86::VPTERNLOGQZ256rrikz, X86::VPTERNLOGQZ256rmbikz, TB_BCAST_Q},
   {X86::VPTERNLOGQZrrik, X86::VPTERNLOGQZrmbik, TB_BCAST_Q},
   {X86::VPTERNLOGQZrrikz, X86::VPTERNLOGQZrmbikz, TB_BCAST_Q},
-  {X86::VPUNPCKHDQZ128rrk, X86::VPUNPCKHDQZ128rmbk, TB_BCAST_Q},
-  {X86::VPUNPCKHDQZ256rrk, X86::VPUNPCKHDQZ256rmbk, TB_BCAST_Q},
-  {X86::VPUNPCKHDQZrrk, X86::VPUNPCKHDQZrmbk, TB_BCAST_Q},
+  {X86::VPUNPCKHDQZ128rrk, X86::VPUNPCKHDQZ128rmbk, TB_BCAST_D},
+  {X86::VPUNPCKHDQZ256rrk, X86::VPUNPCKHDQZ256rmbk, TB_BCAST_D},
+  {X86::VPUNPCKHDQZrrk, X86::VPUNPCKHDQZrmbk, TB_BCAST_D},
   {X86::VPUNPCKHQDQZ128rrk, X86::VPUNPCKHQDQZ128rmbk, TB_BCAST_Q},
   {X86::VPUNPCKHQDQZ256rrk, X86::VPUNPCKHQDQZ256rmbk, TB_BCAST_Q},
   {X86::VPUNPCKHQDQZrrk, X86::VPUNPCKHQDQZrmbk, TB_BCAST_Q},
-  {X86::VPUNPCKLDQZ128rrk, X86::VPUNPCKLDQZ128rmbk, TB_BCAST_Q},
-  {X86::VPUNPCKLDQZ256rrk, X86::VPUNPCKLDQZ256rmbk, TB_BCAST_Q},
-  {X86::VPUNPCKLDQZrrk, X86::VPUNPCKLDQZrmbk, TB_BCAST_Q},
+  {X86::VPUNPCKLDQZ128rrk, X86::VPUNPCKLDQZ128rmbk, TB_BCAST_D},
+  {X86::VPUNPCKLDQZ256rrk, X86::VPUNPCKLDQZ256rmbk, TB_BCAST_D},
+  {X86::VPUNPCKLDQZrrk, X86::VPUNPCKLDQZrmbk, TB_BCAST_D},
   {X86::VPUNPCKLQDQZ128rrk, X86::VPUNPCKLQDQZ128rmbk, TB_BCAST_Q},
   {X86::VPUNPCKLQDQZ256rrk, X86::VPUNPCKLQDQZ256rmbk, TB_BCAST_Q},
   {X86::VPUNPCKLQDQZrrk, X86::VPUNPCKLQDQZrmbk, TB_BCAST_Q},
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 8a860d0945bb1a8..bf2beff28dd8bb2 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -14,6 +14,7 @@
 #include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
 #include "X86RecognizableInstr.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/X86FoldTablesUtils.h"
 #include "llvm/TableGen/Record.h"
@@ -529,45 +530,22 @@ void X86FoldTablesEmitter::addBroadcastEntry(
   assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly");
   X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);
 
-  Record *RegRec = RegInst->TheDef;
-  StringRef RegInstName = RegRec->getName();
-  StringRef MemInstName = MemInst->TheDef->getName();
-  Record *Domain = RegRec->getValueAsDef("ExeDomain");
-  bool IsSSEPackedInt = Domain->getName() == "SSEPackedInt";
-  if ((RegInstName.contains("DZ") || RegInstName.contains("DWZ") ||
-       RegInstName.contains("Dr") || RegInstName.contains("I32")) &&
-      IsSSEPackedInt) {
-    assert((MemInstName.contains("DZ") || RegInstName.contains("DWZ") ||
-            MemInstName.contains("Dr") || MemInstName.contains("I32")) &&
-           "Unmatched names for broadcast");
-    Result.BroadcastKind = X86FoldTableEntry::BCAST_D;
-  } else if ((RegInstName.contains("QZ") || RegInstName.contains("QBZ") ||
-              RegInstName.contains("Qr") || RegInstName.contains("I64")) &&
-             IsSSEPackedInt) {
-    assert((MemInstName.contains("QZ") || MemInstName.contains("QBZ") ||
-            MemInstName.contains("Qr") || MemInstName.contains("I64")) &&
-           "Unmatched names for broadcast");
-    Result.BroadcastKind = X86FoldTableEntry::BCAST_Q;
-  } else if ((RegInstName.contains("PS") || RegInstName.contains("F32") ||
-              RegInstName.contains("CPH")) &&
-             !RegInstName.contains("PH2PS")) {
-    assert((MemInstName.contains("PS") || MemInstName.contains("F32") ||
-            MemInstName.contains("CPH")) &&
-           "Unmatched names for broadcast");
-    Result.BroadcastKind = X86FoldTableEntry::BCAST_SS;
-  } else if ((RegInstName.contains("PD") || RegInstName.contains("F64")) &&
-             !RegInstName.contains("PH2PD")) {
-    assert((MemInstName.contains("PD") || MemInstName.contains("F64")) &&
-           "Unmatched names for broadcast");
-    Result.BroadcastKind = X86FoldTableEntry::BCAST_SD;
-  } else if (RegInstName.contains("PH")) {
-    assert(MemInstName.contains("PH") && "Unmatched names for broadcast");
-    Result.BroadcastKind = X86FoldTableEntry::BCAST_SH;
-  } else {
-    errs() << RegInstName << ", " << MemInstName << "\n";
-    llvm_unreachable("Name is not canoicalized for broadcast or "
-                     "ExeDomain is incorrect");
+  DagInit *In = MemInst->TheDef->getValueAsDag("InOperandList");
+  for (unsigned I = 0, E = In->getNumArgs(); I != E; ++I) {
+    Result.BroadcastKind =
+        StringSwitch<X86FoldTableEntry::BcastType>(In->getArg(I)->getAsString())
+            .Case("i32mem", X86FoldTableEntry::BCAST_D)
+            .Case("i64mem", X86FoldTableEntry::BCAST_Q)
+            .Case("i16mem", X86FoldTableEntry::BCAST_SH)
+            .Case("f16mem", X86FoldTableEntry::BCAST_SH)
+            .Case("f32mem", X86FoldTableEntry::BCAST_SS)
+            .Case("f64mem", X86FoldTableEntry::BCAST_SD)
+            .Default(X86FoldTableEntry::BCAST_NONE);
+    if (Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE)
+      break;
   }
+  assert(Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE &&
+         "Unknown memory operand for broadcast");
 
   Table[RegInst] = Result;
 }

>From 95e42b7c36c6e83800644cf679f4c694c54dd928 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Sat, 27 Jan 2024 18:44:20 +0800
Subject: [PATCH 2/2] Add TB_BCAST_W

---
 .../include/llvm/Support/X86FoldTablesUtils.h | 11 +++---
 llvm/test/TableGen/x86-fold-tables.inc        | 36 +++++++++----------
 llvm/utils/TableGen/X86FoldTablesEmitter.cpp  |  6 +++-
 3 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/Support/X86FoldTablesUtils.h b/llvm/include/llvm/Support/X86FoldTablesUtils.h
index ed244febc38d3a5..77d32cc7fb37ed3 100644
--- a/llvm/include/llvm/Support/X86FoldTablesUtils.h
+++ b/llvm/include/llvm/Support/X86FoldTablesUtils.h
@@ -46,11 +46,12 @@ enum {
   // Broadcast type.
   // (stored in bits 12 - 14)
   TB_BCAST_TYPE_SHIFT = TB_ALIGN_SHIFT + 3,
-  TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT,
-  TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT,
-  TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT,
-  TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT,
-  TB_BCAST_SH = 4 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_W = 0 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_D = 1 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_Q = 2 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_SS = 3 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_SD = 4 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_SH = 5 << TB_BCAST_TYPE_SHIFT,
   TB_BCAST_MASK = 0x7 << TB_BCAST_TYPE_SHIFT,
 
   // Unused bits 15-16
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index dc400548ef2f0a7..aa412fafb22ec67 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -6767,12 +6767,12 @@ static const X86FoldTableEntry BroadcastTable1[] = {
   {X86::VCVTUQQ2PSZ128rr, X86::VCVTUQQ2PSZ128rmb, TB_BCAST_Q},
   {X86::VCVTUQQ2PSZ256rr, X86::VCVTUQQ2PSZ256rmb, TB_BCAST_Q},
   {X86::VCVTUQQ2PSZrr, X86::VCVTUQQ2PSZrmb, TB_BCAST_Q},
-  {X86::VCVTUW2PHZ128rr, X86::VCVTUW2PHZ128rmb, TB_BCAST_SH},
-  {X86::VCVTUW2PHZ256rr, X86::VCVTUW2PHZ256rmb, TB_BCAST_SH},
-  {X86::VCVTUW2PHZrr, X86::VCVTUW2PHZrmb, TB_BCAST_SH},
-  {X86::VCVTW2PHZ128rr, X86::VCVTW2PHZ128rmb, TB_BCAST_SH},
-  {X86::VCVTW2PHZ256rr, X86::VCVTW2PHZ256rmb, TB_BCAST_SH},
-  {X86::VCVTW2PHZrr, X86::VCVTW2PHZrmb, TB_BCAST_SH},
+  {X86::VCVTUW2PHZ128rr, X86::VCVTUW2PHZ128rmb, TB_BCAST_W},
+  {X86::VCVTUW2PHZ256rr, X86::VCVTUW2PHZ256rmb, TB_BCAST_W},
+  {X86::VCVTUW2PHZrr, X86::VCVTUW2PHZrmb, TB_BCAST_W},
+  {X86::VCVTW2PHZ128rr, X86::VCVTW2PHZ128rmb, TB_BCAST_W},
+  {X86::VCVTW2PHZ256rr, X86::VCVTW2PHZ256rmb, TB_BCAST_W},
+  {X86::VCVTW2PHZrr, X86::VCVTW2PHZrmb, TB_BCAST_W},
   {X86::VEXP2PDZr, X86::VEXP2PDZmb, TB_BCAST_SD},
   {X86::VEXP2PSZr, X86::VEXP2PSZmb, TB_BCAST_SS},
   {X86::VFPCLASSPDZ128rr, X86::VFPCLASSPDZ128rmb, TB_BCAST_SD},
@@ -7107,12 +7107,12 @@ static const X86FoldTableEntry BroadcastTable2[] = {
   {X86::VCVTUQQ2PSZ128rrkz, X86::VCVTUQQ2PSZ128rmbkz, TB_BCAST_Q},
   {X86::VCVTUQQ2PSZ256rrkz, X86::VCVTUQQ2PSZ256rmbkz, TB_BCAST_Q},
   {X86::VCVTUQQ2PSZrrkz, X86::VCVTUQQ2PSZrmbkz, TB_BCAST_Q},
-  {X86::VCVTUW2PHZ128rrkz, X86::VCVTUW2PHZ128rmbkz, TB_BCAST_SH},
-  {X86::VCVTUW2PHZ256rrkz, X86::VCVTUW2PHZ256rmbkz, TB_BCAST_SH},
-  {X86::VCVTUW2PHZrrkz, X86::VCVTUW2PHZrmbkz, TB_BCAST_SH},
-  {X86::VCVTW2PHZ128rrkz, X86::VCVTW2PHZ128rmbkz, TB_BCAST_SH},
-  {X86::VCVTW2PHZ256rrkz, X86::VCVTW2PHZ256rmbkz, TB_BCAST_SH},
-  {X86::VCVTW2PHZrrkz, X86::VCVTW2PHZrmbkz, TB_BCAST_SH},
+  {X86::VCVTUW2PHZ128rrkz, X86::VCVTUW2PHZ128rmbkz, TB_BCAST_W},
+  {X86::VCVTUW2PHZ256rrkz, X86::VCVTUW2PHZ256rmbkz, TB_BCAST_W},
+  {X86::VCVTUW2PHZrrkz, X86::VCVTUW2PHZrmbkz, TB_BCAST_W},
+  {X86::VCVTW2PHZ128rrkz, X86::VCVTW2PHZ128rmbkz, TB_BCAST_W},
+  {X86::VCVTW2PHZ256rrkz, X86::VCVTW2PHZ256rmbkz, TB_BCAST_W},
+  {X86::VCVTW2PHZrrkz, X86::VCVTW2PHZrmbkz, TB_BCAST_W},
   {X86::VDIVPDZ128rr, X86::VDIVPDZ128rmb, TB_BCAST_SD},
   {X86::VDIVPDZ256rr, X86::VDIVPDZ256rmb, TB_BCAST_SD},
   {X86::VDIVPDZrr, X86::VDIVPDZrmb, TB_BCAST_SD},
@@ -7772,12 +7772,12 @@ static const X86FoldTableEntry BroadcastTable3[] = {
   {X86::VCVTUQQ2PSZ128rrk, X86::VCVTUQQ2PSZ128rmbk, TB_BCAST_Q},
   {X86::VCVTUQQ2PSZ256rrk, X86::VCVTUQQ2PSZ256rmbk, TB_BCAST_Q},
   {X86::VCVTUQQ2PSZrrk, X86::VCVTUQQ2PSZrmbk, TB_BCAST_Q},
-  {X86::VCVTUW2PHZ128rrk, X86::VCVTUW2PHZ128rmbk, TB_BCAST_SH},
-  {X86::VCVTUW2PHZ256rrk, X86::VCVTUW2PHZ256rmbk, TB_BCAST_SH},
-  {X86::VCVTUW2PHZrrk, X86::VCVTUW2PHZrmbk, TB_BCAST_SH},
-  {X86::VCVTW2PHZ128rrk, X86::VCVTW2PHZ128rmbk, TB_BCAST_SH},
-  {X86::VCVTW2PHZ256rrk, X86::VCVTW2PHZ256rmbk, TB_BCAST_SH},
-  {X86::VCVTW2PHZrrk, X86::VCVTW2PHZrmbk, TB_BCAST_SH},
+  {X86::VCVTUW2PHZ128rrk, X86::VCVTUW2PHZ128rmbk, TB_BCAST_W},
+  {X86::VCVTUW2PHZ256rrk, X86::VCVTUW2PHZ256rmbk, TB_BCAST_W},
+  {X86::VCVTUW2PHZrrk, X86::VCVTUW2PHZrmbk, TB_BCAST_W},
+  {X86::VCVTW2PHZ128rrk, X86::VCVTW2PHZ128rmbk, TB_BCAST_W},
+  {X86::VCVTW2PHZ256rrk, X86::VCVTW2PHZ256rmbk, TB_BCAST_W},
+  {X86::VCVTW2PHZrrk, X86::VCVTW2PHZrmbk, TB_BCAST_W},
   {X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmbkz, TB_BCAST_SD},
   {X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmbkz, TB_BCAST_SD},
   {X86::VDIVPDZrrkz, X86::VDIVPDZrmbkz, TB_BCAST_SD},
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index bf2beff28dd8bb2..7ea02ecba324cbb 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -81,6 +81,7 @@ class X86FoldTablesEmitter {
     bool FoldStore = false;
     enum BcastType {
       BCAST_NONE,
+      BCAST_W,
       BCAST_D,
       BCAST_Q,
       BCAST_SS,
@@ -115,6 +116,9 @@ class X86FoldTablesEmitter {
       switch (BroadcastKind) {
       case BCAST_NONE:
         break;
+      case BCAST_W:
+        Attrs += "TB_BCAST_W|";
+        break;
       case BCAST_D:
         Attrs += "TB_BCAST_D|";
         break;
@@ -534,9 +538,9 @@ void X86FoldTablesEmitter::addBroadcastEntry(
   for (unsigned I = 0, E = In->getNumArgs(); I != E; ++I) {
     Result.BroadcastKind =
         StringSwitch<X86FoldTableEntry::BcastType>(In->getArg(I)->getAsString())
+            .Case("i16mem", X86FoldTableEntry::BCAST_W)
             .Case("i32mem", X86FoldTableEntry::BCAST_D)
             .Case("i64mem", X86FoldTableEntry::BCAST_Q)
-            .Case("i16mem", X86FoldTableEntry::BCAST_SH)
             .Case("f16mem", X86FoldTableEntry::BCAST_SH)
             .Case("f32mem", X86FoldTableEntry::BCAST_SS)
             .Case("f64mem", X86FoldTableEntry::BCAST_SD)



More information about the llvm-commits mailing list