[llvm] [X86] Cleanup AVX512 VBROADCAST subvector instruction names. (PR #108888)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 16 14:24:34 PDT 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/108888
This patch makes the VBROADCAST***X** subvector broadcast instructions consistent - the ***X** section represents the original subvector type/size, but we were not correctly using the AVX512 Z/Z256/Z128 suffix to consistently represent the destination width (or we missed it entirely).
>From e126afbeabe20de89c03a0af21c907ed6bfa2d78 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 16 Sep 2024 22:23:05 +0100
Subject: [PATCH] [X86] Cleanup AVX512 VBROADCAST subvector instruction names.
This patch makes the VBROADCAST***X** subvector broadcast instructions consistent - the ***X** section represents the original subvector size, but we were not correctly using the AVX512 Z/Z256/Z128 suffix to consistently represent the destination width (or we missed it entirely).
---
.../X86/MCTargetDesc/X86InstComments.cpp | 20 +--
.../Target/X86/X86FixupVectorConstants.cpp | 8 +-
llvm/lib/Target/X86/X86InstrAVX512.td | 124 +++++++++---------
llvm/lib/Target/X86/X86InstrInfo.cpp | 8 +-
llvm/lib/Target/X86/X86MCInstLower.cpp | 20 +--
llvm/lib/Target/X86/X86SchedIceLake.td | 20 +--
llvm/lib/Target/X86/X86SchedSapphireRapids.td | 14 +-
llvm/lib/Target/X86/X86SchedSkylakeServer.td | 20 +--
8 files changed, 117 insertions(+), 117 deletions(-)
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 95038ccf63b8b8..a4b72515252a08 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -1249,18 +1249,18 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VBROADCASTF128rm:
case X86::VBROADCASTI128rm:
- CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z128, rm)
- CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z128, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z256, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z256, rm)
DecodeSubVectorBroadcast(4, 2, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTF64X2, , rm)
- CASE_AVX512_INS_COMMON(BROADCASTI64X2, , rm)
+ CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z, rm)
DecodeSubVectorBroadcast(8, 2, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTF64X4, , rm)
- CASE_AVX512_INS_COMMON(BROADCASTI64X4, , rm)
+ CASE_AVX512_INS_COMMON(BROADCASTF64X4, Z, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI64X4, Z, rm)
DecodeSubVectorBroadcast(8, 4, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1269,13 +1269,13 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeSubVectorBroadcast(8, 4, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTF32X4, , rm)
- CASE_AVX512_INS_COMMON(BROADCASTI32X4, , rm)
+ CASE_AVX512_INS_COMMON(BROADCASTF32X4, Z, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X4, Z, rm)
DecodeSubVectorBroadcast(16, 4, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTF32X8, , rm)
- CASE_AVX512_INS_COMMON(BROADCASTI32X8, , rm)
+ CASE_AVX512_INS_COMMON(BROADCASTF32X8, Z, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X8, Z, rm)
DecodeSubVectorBroadcast(16, 8, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
diff --git a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
index c9f79e1645f58b..68a4a0be3a1db7 100644
--- a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
+++ b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
@@ -439,8 +439,8 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
case X86::VMOVUPSZrm:
return FixupConstant({{X86::VBROADCASTSSZrm, 1, 32, rebuildSplatCst},
{X86::VBROADCASTSDZrm, 1, 64, rebuildSplatCst},
- {X86::VBROADCASTF32X4rm, 1, 128, rebuildSplatCst},
- {X86::VBROADCASTF64X4rm, 1, 256, rebuildSplatCst}},
+ {X86::VBROADCASTF32X4Zrm, 1, 128, rebuildSplatCst},
+ {X86::VBROADCASTF64X4Zrm, 1, 256, rebuildSplatCst}},
512, 1);
/* Integer Loads */
case X86::MOVDQArm:
@@ -572,12 +572,12 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
{X86::VPBROADCASTQZrm, 1, 64, rebuildSplatCst},
{X86::VPMOVSXBQZrm, 8, 8, rebuildSExtCst},
{X86::VPMOVZXBQZrm, 8, 8, rebuildZExtCst},
- {X86::VBROADCASTI32X4rm, 1, 128, rebuildSplatCst},
+ {X86::VBROADCASTI32X4Zrm, 1, 128, rebuildSplatCst},
{X86::VPMOVSXBDZrm, 16, 8, rebuildSExtCst},
{X86::VPMOVZXBDZrm, 16, 8, rebuildZExtCst},
{X86::VPMOVSXWQZrm, 8, 16, rebuildSExtCst},
{X86::VPMOVZXWQZrm, 8, 16, rebuildZExtCst},
- {X86::VBROADCASTI64X4rm, 1, 256, rebuildSplatCst},
+ {X86::VBROADCASTI64X4Zrm, 1, 256, rebuildSplatCst},
{HasBWI ? X86::VPMOVSXBWZrm : 0, 32, 8, rebuildSExtCst},
{HasBWI ? X86::VPMOVZXBWZrm : 0, 32, 8, rebuildZExtCst},
{X86::VPMOVSXWDZrm, 16, 16, rebuildSExtCst},
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index b6bf34a8a0d31c..41113d5d1a4e0c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1337,84 +1337,84 @@ let Predicates = [HasVLX, HasBWI] in {
// AVX-512 BROADCAST SUBVECTORS
//
-defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
- X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
- EVEX_V512, EVEX_CD8<32, CD8VT4>;
-defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
- X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
- EVEX_V512, EVEX_CD8<32, CD8VT4>;
-defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
- X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
- EVEX_V512, EVEX_CD8<64, CD8VT4>;
-defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
- X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
- EVEX_V512, EVEX_CD8<64, CD8VT4>;
+defm VBROADCASTI32X4Z : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
+ X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VT4>;
+defm VBROADCASTF32X4Z : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
+ X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VT4>;
+defm VBROADCASTI64X4Z : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
+ X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
+ EVEX_V512, EVEX_CD8<64, CD8VT4>;
+defm VBROADCASTF64X4Z : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
+ X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
+ EVEX_V512, EVEX_CD8<64, CD8VT4>;
let Predicates = [HasAVX512] in {
def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTF64X4rm addr:$src)>;
+ (VBROADCASTF64X4Zrm addr:$src)>;
def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTF64X4rm addr:$src)>;
+ (VBROADCASTF64X4Zrm addr:$src)>;
def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTF64X4rm addr:$src)>;
+ (VBROADCASTF64X4Zrm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTI64X4rm addr:$src)>;
+ (VBROADCASTI64X4Zrm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTI64X4rm addr:$src)>;
+ (VBROADCASTI64X4Zrm addr:$src)>;
def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTI64X4rm addr:$src)>;
+ (VBROADCASTI64X4Zrm addr:$src)>;
def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTI64X4rm addr:$src)>;
+ (VBROADCASTI64X4Zrm addr:$src)>;
def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF32X4rm addr:$src)>;
+ (VBROADCASTF32X4Zrm addr:$src)>;
def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF32X4rm addr:$src)>;
+ (VBROADCASTF32X4Zrm addr:$src)>;
def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF32X4rm addr:$src)>;
+ (VBROADCASTF32X4Zrm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTI32X4rm addr:$src)>;
+ (VBROADCASTI32X4Zrm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTI32X4rm addr:$src)>;
+ (VBROADCASTI32X4Zrm addr:$src)>;
def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTI32X4rm addr:$src)>;
+ (VBROADCASTI32X4Zrm addr:$src)>;
def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTI32X4rm addr:$src)>;
+ (VBROADCASTI32X4Zrm addr:$src)>;
// Patterns for selects of bitcasted operations.
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
(v16f32 immAllZerosV)),
- (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
+ (VBROADCASTF32X4Zrmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
VR512:$src0),
- (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+ (VBROADCASTF32X4Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
(v16i32 immAllZerosV)),
- (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
+ (VBROADCASTI32X4Zrmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
VR512:$src0),
- (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+ (VBROADCASTI32X4Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
(v8f64 immAllZerosV)),
- (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
+ (VBROADCASTF64X4Zrmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
VR512:$src0),
- (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+ (VBROADCASTF64X4Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
(v8i64 immAllZerosV)),
- (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
+ (VBROADCASTI64X4Zrmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
VR512:$src0),
- (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+ (VBROADCASTI64X4Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
let Predicates = [HasVLX] in {
@@ -1461,9 +1461,9 @@ def : Pat<(vselect_mask VK8WM:$mask,
let Predicates = [HasBF16] in {
def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTF64X4rm addr:$src)>;
+ (VBROADCASTF64X4Zrm addr:$src)>;
def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF32X4rm addr:$src)>;
+ (VBROADCASTF32X4Zrm addr:$src)>;
}
let Predicates = [HasBF16, HasVLX] in
@@ -1471,10 +1471,10 @@ let Predicates = [HasBF16, HasVLX] in
(VBROADCASTF32X4Z256rm addr:$src)>;
let Predicates = [HasVLX, HasDQI] in {
-defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
+defm VBROADCASTI64X2Z256 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
X86SubVBroadcastld128, v4i64x_info, v2i64x_info>,
EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
-defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
+defm VBROADCASTF64X2Z256 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
X86SubVBroadcastld128, v4f64x_info, v2f64x_info>,
EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
@@ -1482,69 +1482,69 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2"
def : Pat<(vselect_mask VK4WM:$mask,
(bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
(v4f64 immAllZerosV)),
- (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
+ (VBROADCASTF64X2Z256rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK4WM:$mask,
(bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
VR256X:$src0),
- (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
+ (VBROADCASTF64X2Z256rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK4WM:$mask,
(bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
(v4i64 immAllZerosV)),
- (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
+ (VBROADCASTI64X2Z256rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK4WM:$mask,
(bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
VR256X:$src0),
- (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
+ (VBROADCASTI64X2Z256rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
}
let Predicates = [HasDQI] in {
-defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
- X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
- EVEX_V512, EVEX_CD8<64, CD8VT2>;
-defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
- X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
- EVEX_V512, EVEX_CD8<32, CD8VT8>;
-defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
- X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
- EVEX_V512, EVEX_CD8<64, CD8VT2>;
-defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
- X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
- EVEX_V512, EVEX_CD8<32, CD8VT8>;
+defm VBROADCASTI64X2Z : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
+ X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
+ EVEX_V512, EVEX_CD8<64, CD8VT2>;
+defm VBROADCASTI32X8Z : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
+ X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VT8>;
+defm VBROADCASTF64X2Z : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
+ X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
+ EVEX_V512, EVEX_CD8<64, CD8VT2>;
+defm VBROADCASTF32X8Z : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
+ X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VT8>;
// Patterns for selects of bitcasted operations.
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
(v16f32 immAllZerosV)),
- (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
+ (VBROADCASTF32X8Zrmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
VR512:$src0),
- (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+ (VBROADCASTF32X8Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
(v16i32 immAllZerosV)),
- (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
+ (VBROADCASTI32X8Zrmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
VR512:$src0),
- (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+ (VBROADCASTI32X8Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
(v8f64 immAllZerosV)),
- (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
+ (VBROADCASTF64X2Zrmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
VR512:$src0),
- (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+ (VBROADCASTF64X2Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
(v8i64 immAllZerosV)),
- (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
+ (VBROADCASTI64X2Zrmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
VR512:$src0),
- (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+ (VBROADCASTI64X2Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index a74da000af0cee..ad42a344a2d241 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -6246,16 +6246,16 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case X86::VMOVAPSZ128rm_NOVLX:
return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
- get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
+ get(X86::VBROADCASTF32X4Zrm), X86::sub_xmm);
case X86::VMOVUPSZ128rm_NOVLX:
return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm),
- get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
+ get(X86::VBROADCASTF32X4Zrm), X86::sub_xmm);
case X86::VMOVAPSZ256rm_NOVLX:
return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm),
- get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
+ get(X86::VBROADCASTF64X4Zrm), X86::sub_ymm);
case X86::VMOVUPSZ256rm_NOVLX:
return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm),
- get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
+ get(X86::VBROADCASTF64X4Zrm), X86::sub_ymm);
case X86::VMOVAPSZ128mr_NOVLX:
return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr),
get(X86::VEXTRACTF32x4Zmri), X86::sub_xmm);
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 77ddd2366e629e..55c237e2df2d2e 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -2051,21 +2051,21 @@ static void addConstantComments(const MachineInstr *MI,
case X86::VBROADCASTF128rm:
case X86::VBROADCASTI128rm:
MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm)
- MASK_AVX512_CASE(X86::VBROADCASTF64X2Z128rm)
+ MASK_AVX512_CASE(X86::VBROADCASTF64X2Z256rm)
MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm)
- MASK_AVX512_CASE(X86::VBROADCASTI64X2Z128rm)
+ MASK_AVX512_CASE(X86::VBROADCASTI64X2Z256rm)
printBroadcast(MI, OutStreamer, 2, 128);
break;
- MASK_AVX512_CASE(X86::VBROADCASTF32X4rm)
- MASK_AVX512_CASE(X86::VBROADCASTF64X2rm)
- MASK_AVX512_CASE(X86::VBROADCASTI32X4rm)
- MASK_AVX512_CASE(X86::VBROADCASTI64X2rm)
+ MASK_AVX512_CASE(X86::VBROADCASTF32X4Zrm)
+ MASK_AVX512_CASE(X86::VBROADCASTF64X2Zrm)
+ MASK_AVX512_CASE(X86::VBROADCASTI32X4Zrm)
+ MASK_AVX512_CASE(X86::VBROADCASTI64X2Zrm)
printBroadcast(MI, OutStreamer, 4, 128);
break;
- MASK_AVX512_CASE(X86::VBROADCASTF32X8rm)
- MASK_AVX512_CASE(X86::VBROADCASTF64X4rm)
- MASK_AVX512_CASE(X86::VBROADCASTI32X8rm)
- MASK_AVX512_CASE(X86::VBROADCASTI64X4rm)
+ MASK_AVX512_CASE(X86::VBROADCASTF32X8Zrm)
+ MASK_AVX512_CASE(X86::VBROADCASTF64X4Zrm)
+ MASK_AVX512_CASE(X86::VBROADCASTI32X8Zrm)
+ MASK_AVX512_CASE(X86::VBROADCASTI64X4Zrm)
printBroadcast(MI, OutStreamer, 2, 256);
break;
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index b32db53ff08cde..f9a5d321c42fe4 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -1576,19 +1576,19 @@ def: InstRW<[ICXWriteResGroup121, ReadAfterVecYLd],
"VBROADCASTF32X2Z256rm(b?)",
"VBROADCASTF32X2Zrm(b?)",
"VBROADCASTF32X4Z256rm(b?)",
- "VBROADCASTF32X4rm(b?)",
- "VBROADCASTF32X8rm(b?)",
- "VBROADCASTF64X2Z128rm(b?)",
- "VBROADCASTF64X2rm(b?)",
- "VBROADCASTF64X4rm(b?)",
+ "VBROADCASTF32X4Zrm(b?)",
+ "VBROADCASTF32X8Zrm(b?)",
+ "VBROADCASTF64X2Z256rm(b?)",
+ "VBROADCASTF64X2Zrm(b?)",
+ "VBROADCASTF64X4Zrm(b?)",
"VBROADCASTI32X2Z256rm(b?)",
"VBROADCASTI32X2Zrm(b?)",
"VBROADCASTI32X4Z256rm(b?)",
- "VBROADCASTI32X4rm(b?)",
- "VBROADCASTI32X8rm(b?)",
- "VBROADCASTI64X2Z128rm(b?)",
- "VBROADCASTI64X2rm(b?)",
- "VBROADCASTI64X4rm(b?)",
+ "VBROADCASTI32X4Zrm(b?)",
+ "VBROADCASTI32X8Zrm(b?)",
+ "VBROADCASTI64X2Z256rm(b?)",
+ "VBROADCASTI64X2Zrm(b?)",
+ "VBROADCASTI64X4Zrm(b?)",
"VBROADCASTSD(Z|Z256)rm(b?)",
"VBROADCASTSS(Z|Z256)rm(b?)",
"VINSERTF32x4(Z|Z256)rm(b?)",
diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
index 1fb3c7560a5724..8af0f7b57e642a 100644
--- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td
+++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
@@ -1601,9 +1601,9 @@ def SPRWriteResGroup126 : SchedWriteRes<[SPRPort02_03_11]> {
def : InstRW<[SPRWriteResGroup126], (instregex "^MMX_MOV(D|Q)64rm$",
"^VBROADCAST(F|I)128rm$",
"^VBROADCAST(F|I)32X(2|4)Z256rm$",
- "^VBROADCAST(F|I)32X(8|2Z)rm$",
- "^VBROADCAST(F|I)(32|64)X4rm$",
- "^VBROADCAST(F|I)64X2((Z128)?)rm$",
+ "^VBROADCAST(F|I)32X(8|2)Zrm$",
+ "^VBROADCAST(F|I)(32|64)X4Zrm$",
+ "^VBROADCAST(F|I)64X2(Z|Z256)rm$",
"^VBROADCASTS(DY|SZ)rm$",
"^VBROADCASTS(D|S)Z256rm$",
"^VBROADCASTS(DZ|SY)rm$",
@@ -1652,9 +1652,9 @@ def SPRWriteResGroup131 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11]> {
let Latency = 9;
let NumMicroOps = 2;
}
-def : InstRW<[SPRWriteResGroup131], (instregex "^VBROADCAST(F|I)32X(8|2Z)rmk(z?)$",
- "^VBROADCAST(F|I)(32|64)X4rmk(z?)$",
- "^VBROADCAST(F|I)64X2rmk(z?)$",
+def : InstRW<[SPRWriteResGroup131], (instregex "^VBROADCAST(F|I)32X(8|2)Zrmk(z?)$",
+ "^VBROADCAST(F|I)(32|64)X4Zrmk(z?)$",
+ "^VBROADCAST(F|I)64X2Zrmk(z?)$",
"^VBROADCASTS(D|S)Zrmk(z?)$",
"^VMOV(A|U)P(D|S)Zrmk(z?)$",
"^VMOV(D|SH|SL)DUPZrmk(z?)$",
@@ -2698,7 +2698,7 @@ def SPRWriteResGroup262 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
let NumMicroOps = 2;
}
def : InstRW<[SPRWriteResGroup262], (instregex "^VBROADCAST(F|I)32X(2|4)Z256rmk(z?)$",
- "^VBROADCAST(F|I)64X2Z128rmk(z?)$",
+ "^VBROADCAST(F|I)64X2Z256rmk(z?)$",
"^VBROADCASTS(D|S)Z256rmk(z?)$",
"^VMOV(A|U)P(D|S)Z256rmk(z?)$",
"^VMOV(D|SH|SL)DUPZ256rmk(z?)$",
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 649d38de185a80..daea3cdc1664b2 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -1547,19 +1547,19 @@ def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
"VBROADCASTF32X2Z256rm(b?)",
"VBROADCASTF32X2Zrm(b?)",
"VBROADCASTF32X4Z256rm(b?)",
- "VBROADCASTF32X4rm(b?)",
- "VBROADCASTF32X8rm(b?)",
- "VBROADCASTF64X2Z128rm(b?)",
- "VBROADCASTF64X2rm(b?)",
- "VBROADCASTF64X4rm(b?)",
+ "VBROADCASTF32X4Zrm(b?)",
+ "VBROADCASTF32X8Zrm(b?)",
+ "VBROADCASTF64X2Z256rm(b?)",
+ "VBROADCASTF64X2Zrm(b?)",
+ "VBROADCASTF64X4Zrm(b?)",
"VBROADCASTI32X2Z256rm(b?)",
"VBROADCASTI32X2Zrm(b?)",
"VBROADCASTI32X4Z256rm(b?)",
- "VBROADCASTI32X4rm(b?)",
- "VBROADCASTI32X8rm(b?)",
- "VBROADCASTI64X2Z128rm(b?)",
- "VBROADCASTI64X2rm(b?)",
- "VBROADCASTI64X4rm(b?)",
+ "VBROADCASTI32X4Zrm(b?)",
+ "VBROADCASTI32X8Zrm(b?)",
+ "VBROADCASTI64X2Z256rm(b?)",
+ "VBROADCASTI64X2Zrm(b?)",
+ "VBROADCASTI64X4Zrm(b?)",
"VBROADCASTSD(Z|Z256)rm(b?)",
"VBROADCASTSS(Z|Z256)rm(b?)",
"VINSERTF32x4(Z|Z256)rm(b?)",
More information about the llvm-commits
mailing list