[llvm] 0858c90 - [X86] Add missing register qualifier to the VBLENDVPD/VBLENDVPS/VPBLENDVB instruction names
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 11 08:48:51 PDT 2024
Author: Simon Pilgrim
Date: 2024-03-11T15:48:07Z
New Revision: 0858c906db008e02163e159158c082d9fc82dcca
URL: https://github.com/llvm/llvm-project/commit/0858c906db008e02163e159158c082d9fc82dcca
DIFF: https://github.com/llvm/llvm-project/commit/0858c906db008e02163e159158c082d9fc82dcca.diff
LOG: [X86] Add missing register qualifier to the VBLENDVPD/VBLENDVPS/VPBLENDVB instruction names
Matches the SSE variants (which has a 0 qualifier to indicate the xmm0 explicit dependency)
Added:
Modified:
llvm/lib/Target/X86/X86FastISel.cpp
llvm/lib/Target/X86/X86InstrSSE.td
llvm/lib/Target/X86/X86SchedAlderlakeP.td
llvm/lib/Target/X86/X86SchedSapphireRapids.td
llvm/test/TableGen/x86-fold-tables.inc
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 9f0b5f32df20a0..48d3b68b1823a5 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -2230,7 +2230,7 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
unsigned CmpOpcode =
(RetVT == MVT::f32) ? X86::VCMPSSrri : X86::VCMPSDrri;
unsigned BlendOpcode =
- (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
+ (RetVT == MVT::f32) ? X86::VBLENDVPSrrr : X86::VBLENDVPDrrr;
Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpRHSReg,
CC);
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 4a542b7e5a1bb0..69d45366a1dbce 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -6266,27 +6266,27 @@ multiclass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType VT,
PatFrag mem_frag, SDNode OpNode,
X86FoldableSchedWrite sched> {
- def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))],
- SSEPackedInt>, TA, PD, VEX, VVVV,
- Sched<[sched]>;
+ def rrr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))],
+ SSEPackedInt>, TA, PD, VEX, VVVV,
+ Sched<[sched]>;
- def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, RC:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst,
- (OpNode RC:$src3, (mem_frag addr:$src2),
- RC:$src1))], SSEPackedInt>, TA, PD, VEX, VVVV,
- Sched<[sched.Folded, sched.ReadAfterFold,
- // x86memop:$src2
- ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault,
- // RC::$src3
- sched.ReadAfterFold]>;
+ def rmr : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpNode RC:$src3, (mem_frag addr:$src2),
+ RC:$src1))], SSEPackedInt>, TA, PD, VEX, VVVV,
+ Sched<[sched.Folded, sched.ReadAfterFold,
+ // x86memop:$src2
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault,
+ // RC::$src3
+ sched.ReadAfterFold]>;
}
let Predicates = [HasAVX] in {
@@ -6320,16 +6320,16 @@ defm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem,
let Predicates = [HasAVX] in {
def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1),
(v4i32 VR128:$src2))),
- (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ (VBLENDVPSrrr VR128:$src2, VR128:$src1, VR128:$mask)>;
def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1),
(v2i64 VR128:$src2))),
- (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
+ (VBLENDVPDrrr VR128:$src2, VR128:$src1, VR128:$mask)>;
def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1),
(v8i32 VR256:$src2))),
- (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ (VBLENDVPSYrrr VR256:$src2, VR256:$src1, VR256:$mask)>;
def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1),
(v4i64 VR256:$src2))),
- (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+ (VBLENDVPDYrrr VR256:$src2, VR256:$src1, VR256:$mask)>;
}
// Prefer a movss or movsd over a blendps when optimizing for size. these were
diff --git a/llvm/lib/Target/X86/X86SchedAlderlakeP.td b/llvm/lib/Target/X86/X86SchedAlderlakeP.td
index 4dc5ea3c861125..6f9d2cf7ffdf47 100644
--- a/llvm/lib/Target/X86/X86SchedAlderlakeP.td
+++ b/llvm/lib/Target/X86/X86SchedAlderlakeP.td
@@ -2158,16 +2158,16 @@ def ADLPWriteResGroup244 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_11]> {
let Latency = 9;
let NumMicroOps = 4;
}
-def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rm$")>;
-def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrm)>;
+def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rmr$")>;
+def : InstRW<[ADLPWriteResGroup244, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrmr)>;
def ADLPWriteResGroup245 : SchedWriteRes<[ADLPPort00_01_05]> {
let ReleaseAtCycles = [3];
let Latency = 3;
let NumMicroOps = 3;
}
-def : InstRW<[ADLPWriteResGroup245], (instregex "^VBLENDVP(D|S)rr$")>;
-def : InstRW<[ADLPWriteResGroup245], (instrs VPBLENDVBrr)>;
+def : InstRW<[ADLPWriteResGroup245], (instregex "^VBLENDVP(D|S)rrr$")>;
+def : InstRW<[ADLPWriteResGroup245], (instrs VPBLENDVBrrr)>;
def ADLPWriteResGroup246 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> {
let ReleaseAtCycles = [6, 7, 18];
diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
index 3c698d2c9f7a01..88bb9ad8f1d749 100644
--- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td
+++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
@@ -2673,25 +2673,25 @@ def SPRWriteResGroup259 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
let Latency = 10;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)Yrm$")>;
-def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBYrm)>;
+def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)Yrmr$")>;
+def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBYrmr)>;
def SPRWriteResGroup260 : SchedWriteRes<[SPRPort00_01_05]> {
let ReleaseAtCycles = [3];
let Latency = 3;
let NumMicroOps = 3;
}
-def : InstRW<[SPRWriteResGroup260], (instregex "^VBLENDVP(S|DY)rr$",
- "^VBLENDVP(D|SY)rr$",
- "^VPBLENDVB(Y?)rr$")>;
+def : InstRW<[SPRWriteResGroup260], (instregex "^VBLENDVP(S|DY)rrr$",
+ "^VBLENDVP(D|SY)rrr$",
+ "^VPBLENDVB(Y?)rrr$")>;
def SPRWriteResGroup261 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
let ReleaseAtCycles = [3, 1];
let Latency = 9;
let NumMicroOps = 4;
}
-def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rm$")>;
-def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrm)>;
+def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rmr$")>;
+def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrmr)>;
def SPRWriteResGroup262 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
let Latency = 9;
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index e0fccd42e47f73..eea4f87cae9cec 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -2363,10 +2363,10 @@ static const X86FoldTableEntry Table2[] = {
{X86::VBLENDPDrri, X86::VBLENDPDrmi, 0},
{X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0},
{X86::VBLENDPSrri, X86::VBLENDPSrmi, 0},
- {X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0},
- {X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0},
- {X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0},
- {X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0},
+ {X86::VBLENDVPDYrrr, X86::VBLENDVPDYrmr, 0},
+ {X86::VBLENDVPDrrr, X86::VBLENDVPDrmr, 0},
+ {X86::VBLENDVPSYrrr, X86::VBLENDVPSYrmr, 0},
+ {X86::VBLENDVPSrrr, X86::VBLENDVPSrmr, 0},
{X86::VBROADCASTF32X2Z256rrkz, X86::VBROADCASTF32X2Z256rmkz, TB_NO_REVERSE},
{X86::VBROADCASTF32X2Zrrkz, X86::VBROADCASTF32X2Zrmkz, TB_NO_REVERSE},
{X86::VBROADCASTI32X2Z128rrkz, X86::VBROADCASTI32X2Z128rmkz, TB_NO_REVERSE},
@@ -3042,8 +3042,8 @@ static const X86FoldTableEntry Table2[] = {
{X86::VPBLENDMWZ128rr, X86::VPBLENDMWZ128rm, 0},
{X86::VPBLENDMWZ256rr, X86::VPBLENDMWZ256rm, 0},
{X86::VPBLENDMWZrr, X86::VPBLENDMWZrm, 0},
- {X86::VPBLENDVBYrr, X86::VPBLENDVBYrm, 0},
- {X86::VPBLENDVBrr, X86::VPBLENDVBrm, 0},
+ {X86::VPBLENDVBYrrr, X86::VPBLENDVBYrmr, 0},
+ {X86::VPBLENDVBrrr, X86::VPBLENDVBrmr, 0},
{X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0},
{X86::VPBLENDWrri, X86::VPBLENDWrmi, 0},
{X86::VPBROADCASTBZ128rrkz, X86::VPBROADCASTBZ128rmkz, TB_NO_REVERSE},
More information about the llvm-commits
mailing list