[llvm] [X86] Cleanup AVX512 VBROADCAST subvector instruction names. (PR #108888)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 16 14:24:34 PDT 2024


https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/108888

This patch makes the VBROADCAST***X** subvector broadcast instructions consistent - the ***X** section represents the original subvector type/size, but we were not correctly using the AVX512 Z/Z256/Z128 suffix to consistently represent the destination width (or we missed it entirely).

>From e126afbeabe20de89c03a0af21c907ed6bfa2d78 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 16 Sep 2024 22:23:05 +0100
Subject: [PATCH] [X86] Cleanup AVX512 VBROADCAST subvector instruction names.

This patch makes the VBROADCAST***X** subvector broadcast instructions consistent - the ***X** section represents the original subvector size, but we were not correctly using the AVX512 Z/Z256/Z128 suffix to consistently represent the destination width (or we missed it entirely).
---
 .../X86/MCTargetDesc/X86InstComments.cpp      |  20 +--
 .../Target/X86/X86FixupVectorConstants.cpp    |   8 +-
 llvm/lib/Target/X86/X86InstrAVX512.td         | 124 +++++++++---------
 llvm/lib/Target/X86/X86InstrInfo.cpp          |   8 +-
 llvm/lib/Target/X86/X86MCInstLower.cpp        |  20 +--
 llvm/lib/Target/X86/X86SchedIceLake.td        |  20 +--
 llvm/lib/Target/X86/X86SchedSapphireRapids.td |  14 +-
 llvm/lib/Target/X86/X86SchedSkylakeServer.td  |  20 +--
 8 files changed, 117 insertions(+), 117 deletions(-)

diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 95038ccf63b8b8..a4b72515252a08 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -1249,18 +1249,18 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   case X86::VBROADCASTF128rm:
   case X86::VBROADCASTI128rm:
-  CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z128, rm)
-  CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z128, rm)
+  CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z256, rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z256, rm)
     DecodeSubVectorBroadcast(4, 2, ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
-  CASE_AVX512_INS_COMMON(BROADCASTF64X2, , rm)
-  CASE_AVX512_INS_COMMON(BROADCASTI64X2, , rm)
+  CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z, rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z, rm)
     DecodeSubVectorBroadcast(8, 2, ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
-  CASE_AVX512_INS_COMMON(BROADCASTF64X4, , rm)
-  CASE_AVX512_INS_COMMON(BROADCASTI64X4, , rm)
+  CASE_AVX512_INS_COMMON(BROADCASTF64X4, Z, rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI64X4, Z, rm)
     DecodeSubVectorBroadcast(8, 4, ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -1269,13 +1269,13 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DecodeSubVectorBroadcast(8, 4, ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
-  CASE_AVX512_INS_COMMON(BROADCASTF32X4, , rm)
-  CASE_AVX512_INS_COMMON(BROADCASTI32X4, , rm)
+  CASE_AVX512_INS_COMMON(BROADCASTF32X4, Z, rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X4, Z, rm)
     DecodeSubVectorBroadcast(16, 4, ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
-  CASE_AVX512_INS_COMMON(BROADCASTF32X8, , rm)
-  CASE_AVX512_INS_COMMON(BROADCASTI32X8, , rm)
+  CASE_AVX512_INS_COMMON(BROADCASTF32X8, Z, rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X8, Z, rm)
     DecodeSubVectorBroadcast(16, 8, ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
diff --git a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
index c9f79e1645f58b..68a4a0be3a1db7 100644
--- a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
+++ b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
@@ -439,8 +439,8 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
   case X86::VMOVUPSZrm:
     return FixupConstant({{X86::VBROADCASTSSZrm, 1, 32, rebuildSplatCst},
                           {X86::VBROADCASTSDZrm, 1, 64, rebuildSplatCst},
-                          {X86::VBROADCASTF32X4rm, 1, 128, rebuildSplatCst},
-                          {X86::VBROADCASTF64X4rm, 1, 256, rebuildSplatCst}},
+                          {X86::VBROADCASTF32X4Zrm, 1, 128, rebuildSplatCst},
+                          {X86::VBROADCASTF64X4Zrm, 1, 256, rebuildSplatCst}},
                          512, 1);
     /* Integer Loads */
   case X86::MOVDQArm:
@@ -572,12 +572,12 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
         {X86::VPBROADCASTQZrm, 1, 64, rebuildSplatCst},
         {X86::VPMOVSXBQZrm, 8, 8, rebuildSExtCst},
         {X86::VPMOVZXBQZrm, 8, 8, rebuildZExtCst},
-        {X86::VBROADCASTI32X4rm, 1, 128, rebuildSplatCst},
+        {X86::VBROADCASTI32X4Zrm, 1, 128, rebuildSplatCst},
         {X86::VPMOVSXBDZrm, 16, 8, rebuildSExtCst},
         {X86::VPMOVZXBDZrm, 16, 8, rebuildZExtCst},
         {X86::VPMOVSXWQZrm, 8, 16, rebuildSExtCst},
         {X86::VPMOVZXWQZrm, 8, 16, rebuildZExtCst},
-        {X86::VBROADCASTI64X4rm, 1, 256, rebuildSplatCst},
+        {X86::VBROADCASTI64X4Zrm, 1, 256, rebuildSplatCst},
         {HasBWI ? X86::VPMOVSXBWZrm : 0, 32, 8, rebuildSExtCst},
         {HasBWI ? X86::VPMOVZXBWZrm : 0, 32, 8, rebuildZExtCst},
         {X86::VPMOVSXWDZrm, 16, 16, rebuildSExtCst},
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index b6bf34a8a0d31c..41113d5d1a4e0c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1337,84 +1337,84 @@ let Predicates = [HasVLX, HasBWI] in {
 // AVX-512 BROADCAST SUBVECTORS
 //
 
-defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
-                       X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
-                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
-defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
-                       X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
-                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
-defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
-                       X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
-                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
-defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
-                       X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
-                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
+defm VBROADCASTI32X4Z : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
+                        X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
+                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
+defm VBROADCASTF32X4Z : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
+                        X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
+                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
+defm VBROADCASTI64X4Z : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
+                        X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
+                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
+defm VBROADCASTF64X4Z : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
+                        X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
+                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
 
 let Predicates = [HasAVX512] in {
 def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
-          (VBROADCASTF64X4rm addr:$src)>;
+          (VBROADCASTF64X4Zrm addr:$src)>;
 def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
-          (VBROADCASTF64X4rm addr:$src)>;
+          (VBROADCASTF64X4Zrm addr:$src)>;
 def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
-          (VBROADCASTF64X4rm addr:$src)>;
+          (VBROADCASTF64X4Zrm addr:$src)>;
 def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
-          (VBROADCASTI64X4rm addr:$src)>;
+          (VBROADCASTI64X4Zrm addr:$src)>;
 def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
-          (VBROADCASTI64X4rm addr:$src)>;
+          (VBROADCASTI64X4Zrm addr:$src)>;
 def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
-          (VBROADCASTI64X4rm addr:$src)>;
+          (VBROADCASTI64X4Zrm addr:$src)>;
 def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
-          (VBROADCASTI64X4rm addr:$src)>;
+          (VBROADCASTI64X4Zrm addr:$src)>;
 
 def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
-          (VBROADCASTF32X4rm addr:$src)>;
+          (VBROADCASTF32X4Zrm addr:$src)>;
 def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
-          (VBROADCASTF32X4rm addr:$src)>;
+          (VBROADCASTF32X4Zrm addr:$src)>;
 def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
-          (VBROADCASTF32X4rm addr:$src)>;
+          (VBROADCASTF32X4Zrm addr:$src)>;
 def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
-          (VBROADCASTI32X4rm addr:$src)>;
+          (VBROADCASTI32X4Zrm addr:$src)>;
 def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
-          (VBROADCASTI32X4rm addr:$src)>;
+          (VBROADCASTI32X4Zrm addr:$src)>;
 def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
-          (VBROADCASTI32X4rm addr:$src)>;
+          (VBROADCASTI32X4Zrm addr:$src)>;
 def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
-          (VBROADCASTI32X4rm addr:$src)>;
+          (VBROADCASTI32X4Zrm addr:$src)>;
 
 // Patterns for selects of bitcasted operations.
 def : Pat<(vselect_mask VK16WM:$mask,
                         (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
                         (v16f32 immAllZerosV)),
-          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
+          (VBROADCASTF32X4Zrmkz VK16WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK16WM:$mask,
                         (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
                         VR512:$src0),
-          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+          (VBROADCASTF32X4Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK16WM:$mask,
                         (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
                         (v16i32 immAllZerosV)),
-          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
+          (VBROADCASTI32X4Zrmkz VK16WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK16WM:$mask,
                         (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
                         VR512:$src0),
-          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+          (VBROADCASTI32X4Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
 
 def : Pat<(vselect_mask VK8WM:$mask,
                         (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
                         (v8f64 immAllZerosV)),
-          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
+          (VBROADCASTF64X4Zrmkz VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK8WM:$mask,
                         (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
                         VR512:$src0),
-          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+          (VBROADCASTF64X4Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK8WM:$mask,
                         (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
                         (v8i64 immAllZerosV)),
-          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
+          (VBROADCASTI64X4Zrmkz VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK8WM:$mask,
                         (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
                         VR512:$src0),
-          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+          (VBROADCASTI64X4Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
 }
 
 let Predicates = [HasVLX] in {
@@ -1461,9 +1461,9 @@ def : Pat<(vselect_mask VK8WM:$mask,
 
 let Predicates = [HasBF16] in {
   def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
-            (VBROADCASTF64X4rm addr:$src)>;
+            (VBROADCASTF64X4Zrm addr:$src)>;
   def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
-            (VBROADCASTF32X4rm addr:$src)>;
+            (VBROADCASTF32X4Zrm addr:$src)>;
 }
 
 let Predicates = [HasBF16, HasVLX] in
@@ -1471,10 +1471,10 @@ let Predicates = [HasBF16, HasVLX] in
             (VBROADCASTF32X4Z256rm addr:$src)>;
 
 let Predicates = [HasVLX, HasDQI] in {
-defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
+defm VBROADCASTI64X2Z256 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
                            X86SubVBroadcastld128, v4i64x_info, v2i64x_info>,
                            EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
-defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
+defm VBROADCASTF64X2Z256 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
                            X86SubVBroadcastld128, v4f64x_info, v2f64x_info>,
                            EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
 
@@ -1482,69 +1482,69 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2"
 def : Pat<(vselect_mask VK4WM:$mask,
                         (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
                         (v4f64 immAllZerosV)),
-          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
+          (VBROADCASTF64X2Z256rmkz VK4WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK4WM:$mask,
                         (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
                         VR256X:$src0),
-          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
+          (VBROADCASTF64X2Z256rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK4WM:$mask,
                         (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
                         (v4i64 immAllZerosV)),
-          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
+          (VBROADCASTI64X2Z256rmkz VK4WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK4WM:$mask,
                         (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
                         VR256X:$src0),
-          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
+          (VBROADCASTI64X2Z256rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
 }
 
 let Predicates = [HasDQI] in {
-defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
-                       X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
-                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
-defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
-                       X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
-                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
-defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
-                       X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
-                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
-defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
-                       X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
-                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
+defm VBROADCASTI64X2Z : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
+                        X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
+                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
+defm VBROADCASTI32X8Z : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
+                        X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
+                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
+defm VBROADCASTF64X2Z : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
+                        X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
+                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
+defm VBROADCASTF32X8Z : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
+                        X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
+                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
 
 // Patterns for selects of bitcasted operations.
 def : Pat<(vselect_mask VK16WM:$mask,
                         (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
                         (v16f32 immAllZerosV)),
-          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
+          (VBROADCASTF32X8Zrmkz VK16WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK16WM:$mask,
                         (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
                         VR512:$src0),
-          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+          (VBROADCASTF32X8Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK16WM:$mask,
                         (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
                         (v16i32 immAllZerosV)),
-          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
+          (VBROADCASTI32X8Zrmkz VK16WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK16WM:$mask,
                         (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
                         VR512:$src0),
-          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+          (VBROADCASTI32X8Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
 
 def : Pat<(vselect_mask VK8WM:$mask,
                         (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
                         (v8f64 immAllZerosV)),
-          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
+          (VBROADCASTF64X2Zrmkz VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK8WM:$mask,
                         (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
                         VR512:$src0),
-          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+          (VBROADCASTF64X2Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK8WM:$mask,
                         (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
                         (v8i64 immAllZerosV)),
-          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
+          (VBROADCASTI64X2Zrmkz VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect_mask VK8WM:$mask,
                         (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
                         VR512:$src0),
-          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+          (VBROADCASTI64X2Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
 }
 
 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index a74da000af0cee..ad42a344a2d241 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -6246,16 +6246,16 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   }
   case X86::VMOVAPSZ128rm_NOVLX:
     return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
-                           get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
+                           get(X86::VBROADCASTF32X4Zrm), X86::sub_xmm);
   case X86::VMOVUPSZ128rm_NOVLX:
     return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm),
-                           get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
+                           get(X86::VBROADCASTF32X4Zrm), X86::sub_xmm);
   case X86::VMOVAPSZ256rm_NOVLX:
     return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm),
-                           get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
+                           get(X86::VBROADCASTF64X4Zrm), X86::sub_ymm);
   case X86::VMOVUPSZ256rm_NOVLX:
     return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm),
-                           get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
+                           get(X86::VBROADCASTF64X4Zrm), X86::sub_ymm);
   case X86::VMOVAPSZ128mr_NOVLX:
     return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr),
                             get(X86::VEXTRACTF32x4Zmri), X86::sub_xmm);
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 77ddd2366e629e..55c237e2df2d2e 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -2051,21 +2051,21 @@ static void addConstantComments(const MachineInstr *MI,
   case X86::VBROADCASTF128rm:
   case X86::VBROADCASTI128rm:
   MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm)
-  MASK_AVX512_CASE(X86::VBROADCASTF64X2Z128rm)
+  MASK_AVX512_CASE(X86::VBROADCASTF64X2Z256rm)
   MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm)
-  MASK_AVX512_CASE(X86::VBROADCASTI64X2Z128rm)
+  MASK_AVX512_CASE(X86::VBROADCASTI64X2Z256rm)
     printBroadcast(MI, OutStreamer, 2, 128);
     break;
-  MASK_AVX512_CASE(X86::VBROADCASTF32X4rm)
-  MASK_AVX512_CASE(X86::VBROADCASTF64X2rm)
-  MASK_AVX512_CASE(X86::VBROADCASTI32X4rm)
-  MASK_AVX512_CASE(X86::VBROADCASTI64X2rm)
+  MASK_AVX512_CASE(X86::VBROADCASTF32X4Zrm)
+  MASK_AVX512_CASE(X86::VBROADCASTF64X2Zrm)
+  MASK_AVX512_CASE(X86::VBROADCASTI32X4Zrm)
+  MASK_AVX512_CASE(X86::VBROADCASTI64X2Zrm)
     printBroadcast(MI, OutStreamer, 4, 128);
     break;
-  MASK_AVX512_CASE(X86::VBROADCASTF32X8rm)
-  MASK_AVX512_CASE(X86::VBROADCASTF64X4rm)
-  MASK_AVX512_CASE(X86::VBROADCASTI32X8rm)
-  MASK_AVX512_CASE(X86::VBROADCASTI64X4rm)
+  MASK_AVX512_CASE(X86::VBROADCASTF32X8Zrm)
+  MASK_AVX512_CASE(X86::VBROADCASTF64X4Zrm)
+  MASK_AVX512_CASE(X86::VBROADCASTI32X8Zrm)
+  MASK_AVX512_CASE(X86::VBROADCASTI64X4Zrm)
     printBroadcast(MI, OutStreamer, 2, 256);
     break;
 
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index b32db53ff08cde..f9a5d321c42fe4 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -1576,19 +1576,19 @@ def: InstRW<[ICXWriteResGroup121, ReadAfterVecYLd],
                                               "VBROADCASTF32X2Z256rm(b?)",
                                               "VBROADCASTF32X2Zrm(b?)",
                                               "VBROADCASTF32X4Z256rm(b?)",
-                                              "VBROADCASTF32X4rm(b?)",
-                                              "VBROADCASTF32X8rm(b?)",
-                                              "VBROADCASTF64X2Z128rm(b?)",
-                                              "VBROADCASTF64X2rm(b?)",
-                                              "VBROADCASTF64X4rm(b?)",
+                                              "VBROADCASTF32X4Zrm(b?)",
+                                              "VBROADCASTF32X8Zrm(b?)",
+                                              "VBROADCASTF64X2Z256rm(b?)",
+                                              "VBROADCASTF64X2Zrm(b?)",
+                                              "VBROADCASTF64X4Zrm(b?)",
                                               "VBROADCASTI32X2Z256rm(b?)",
                                               "VBROADCASTI32X2Zrm(b?)",
                                               "VBROADCASTI32X4Z256rm(b?)",
-                                              "VBROADCASTI32X4rm(b?)",
-                                              "VBROADCASTI32X8rm(b?)",
-                                              "VBROADCASTI64X2Z128rm(b?)",
-                                              "VBROADCASTI64X2rm(b?)",
-                                              "VBROADCASTI64X4rm(b?)",
+                                              "VBROADCASTI32X4Zrm(b?)",
+                                              "VBROADCASTI32X8Zrm(b?)",
+                                              "VBROADCASTI64X2Z256rm(b?)",
+                                              "VBROADCASTI64X2Zrm(b?)",
+                                              "VBROADCASTI64X4Zrm(b?)",
                                               "VBROADCASTSD(Z|Z256)rm(b?)",
                                               "VBROADCASTSS(Z|Z256)rm(b?)",
                                               "VINSERTF32x4(Z|Z256)rm(b?)",
diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
index 1fb3c7560a5724..8af0f7b57e642a 100644
--- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td
+++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
@@ -1601,9 +1601,9 @@ def SPRWriteResGroup126 : SchedWriteRes<[SPRPort02_03_11]> {
 def : InstRW<[SPRWriteResGroup126], (instregex "^MMX_MOV(D|Q)64rm$",
                                                "^VBROADCAST(F|I)128rm$",
                                                "^VBROADCAST(F|I)32X(2|4)Z256rm$",
-                                               "^VBROADCAST(F|I)32X(8|2Z)rm$",
-                                               "^VBROADCAST(F|I)(32|64)X4rm$",
-                                               "^VBROADCAST(F|I)64X2((Z128)?)rm$",
+                                               "^VBROADCAST(F|I)32X(8|2)Zrm$",
+                                               "^VBROADCAST(F|I)(32|64)X4Zrm$",
+                                               "^VBROADCAST(F|I)64X2(Z|Z256)rm$",
                                                "^VBROADCASTS(DY|SZ)rm$",
                                                "^VBROADCASTS(D|S)Z256rm$",
                                                "^VBROADCASTS(DZ|SY)rm$",
@@ -1652,9 +1652,9 @@ def SPRWriteResGroup131 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11]> {
   let Latency = 9;
   let NumMicroOps = 2;
 }
-def : InstRW<[SPRWriteResGroup131], (instregex "^VBROADCAST(F|I)32X(8|2Z)rmk(z?)$",
-                                               "^VBROADCAST(F|I)(32|64)X4rmk(z?)$",
-                                               "^VBROADCAST(F|I)64X2rmk(z?)$",
+def : InstRW<[SPRWriteResGroup131], (instregex "^VBROADCAST(F|I)32X(8|2)Zrmk(z?)$",
+                                               "^VBROADCAST(F|I)(32|64)X4Zrmk(z?)$",
+                                               "^VBROADCAST(F|I)64X2Zrmk(z?)$",
                                                "^VBROADCASTS(D|S)Zrmk(z?)$",
                                                "^VMOV(A|U)P(D|S)Zrmk(z?)$",
                                                "^VMOV(D|SH|SL)DUPZrmk(z?)$",
@@ -2698,7 +2698,7 @@ def SPRWriteResGroup262 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
   let NumMicroOps = 2;
 }
 def : InstRW<[SPRWriteResGroup262], (instregex "^VBROADCAST(F|I)32X(2|4)Z256rmk(z?)$",
-                                               "^VBROADCAST(F|I)64X2Z128rmk(z?)$",
+                                               "^VBROADCAST(F|I)64X2Z256rmk(z?)$",
                                                "^VBROADCASTS(D|S)Z256rmk(z?)$",
                                                "^VMOV(A|U)P(D|S)Z256rmk(z?)$",
                                                "^VMOV(D|SH|SL)DUPZ256rmk(z?)$",
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 649d38de185a80..daea3cdc1664b2 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -1547,19 +1547,19 @@ def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
                                               "VBROADCASTF32X2Z256rm(b?)",
                                               "VBROADCASTF32X2Zrm(b?)",
                                               "VBROADCASTF32X4Z256rm(b?)",
-                                              "VBROADCASTF32X4rm(b?)",
-                                              "VBROADCASTF32X8rm(b?)",
-                                              "VBROADCASTF64X2Z128rm(b?)",
-                                              "VBROADCASTF64X2rm(b?)",
-                                              "VBROADCASTF64X4rm(b?)",
+                                              "VBROADCASTF32X4Zrm(b?)",
+                                              "VBROADCASTF32X8Zrm(b?)",
+                                              "VBROADCASTF64X2Z256rm(b?)",
+                                              "VBROADCASTF64X2Zrm(b?)",
+                                              "VBROADCASTF64X4Zrm(b?)",
                                               "VBROADCASTI32X2Z256rm(b?)",
                                               "VBROADCASTI32X2Zrm(b?)",
                                               "VBROADCASTI32X4Z256rm(b?)",
-                                              "VBROADCASTI32X4rm(b?)",
-                                              "VBROADCASTI32X8rm(b?)",
-                                              "VBROADCASTI64X2Z128rm(b?)",
-                                              "VBROADCASTI64X2rm(b?)",
-                                              "VBROADCASTI64X4rm(b?)",
+                                              "VBROADCASTI32X4Zrm(b?)",
+                                              "VBROADCASTI32X8Zrm(b?)",
+                                              "VBROADCASTI64X2Z256rm(b?)",
+                                              "VBROADCASTI64X2Zrm(b?)",
+                                              "VBROADCASTI64X4Zrm(b?)",
                                               "VBROADCASTSD(Z|Z256)rm(b?)",
                                               "VBROADCASTSS(Z|Z256)rm(b?)",
                                               "VINSERTF32x4(Z|Z256)rm(b?)",



More information about the llvm-commits mailing list