[llvm] [X86] X86FixupVectorConstants - load+zero vector constants that can be stored in a truncated form (PR #80428)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 2 04:59:49 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Further develops the vsextload support added in #<!-- -->79815 - reduces the size of the vector constant by storing it in the constant pool in a truncated form, and zero-extend it as part of the load.
---
Patch is 224.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80428.diff
68 Files Affected:
- (modified) llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp (+12-6)
- (modified) llvm/lib/Target/X86/X86FixupVectorConstants.cpp (+46-6)
- (modified) llvm/lib/Target/X86/X86MCInstLower.cpp (+65-15)
- (modified) llvm/test/CodeGen/X86/avx2-vector-shifts.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/combine-mul.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/combine-srl.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/i64-to-float.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/icmp-abs-C-vec.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/icmp-pow2-mask.ll (+27-13)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_usat.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-128.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pmul.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/pr62014.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/psubus.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/sadd_sat_vec.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/sext-vsetcc.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/sse41.ll (+12-12)
- (modified) llvm/test/CodeGen/X86/ssub_sat_vec.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/var-permute-256.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vec_cmp_sint-128.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vec_compare-sse4.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vec_minmax_sint.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/vec_saddo.ll (+68-22)
- (modified) llvm/test/CodeGen/X86/vec_setcc-2.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vec_smulo.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vec_ssubo.ll (+80-26)
- (modified) llvm/test/CodeGen/X86/vec_umulo.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-bo-select.ll (+40-40)
- (modified) llvm/test/CodeGen/X86/vector-fshl-256.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-fshl-rot-128.ll (+19-10)
- (modified) llvm/test/CodeGen/X86/vector-fshl-rot-256.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-fshr-128.ll (+30-15)
- (modified) llvm/test/CodeGen/X86/vector-fshr-256.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-fshr-rot-128.ll (+27-13)
- (modified) llvm/test/CodeGen/X86/vector-fshr-rot-256.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-mul.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/vector-reduce-add-mask.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-reduce-smax.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-reduce-smin.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll (+55-27)
- (modified) llvm/test/CodeGen/X86/vector-rotate-128.ll (+19-10)
- (modified) llvm/test/CodeGen/X86/vector-rotate-256.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-sext.ll (+135-55)
- (modified) llvm/test/CodeGen/X86/vector-shift-shl-128.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-shift-shl-256.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll (+46-14)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-trunc-math.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-trunc-packus.ll (+44-49)
- (modified) llvm/test/CodeGen/X86/vector-trunc-ssat.ll (+28-28)
- (modified) llvm/test/CodeGen/X86/vector-trunc-usat.ll (+12-12)
- (modified) llvm/test/CodeGen/X86/vector-trunc.ll (+83-41)
- (modified) llvm/test/CodeGen/X86/vector-unsigned-cmp.ll (+140-68)
- (modified) llvm/test/CodeGen/X86/vselect-pcmp.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vselect-post-combine.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg.ll (+156-82)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll (+4-4)
``````````diff
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 619328af12719..e8a044b82eb80 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -1318,7 +1318,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
CASE_PMOVZX(PMOVZXBW, r)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
[[fallthrough]];
- CASE_PMOVZX(PMOVZXBW, m)
+ CASE_MASK_PMOVZX(PMOVZXBW, m)
+ CASE_MASKZ_PMOVZX(PMOVZXBW, m)
DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), false,
ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
@@ -1327,7 +1328,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
CASE_PMOVZX(PMOVZXBD, r)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
[[fallthrough]];
- CASE_PMOVZX(PMOVZXBD, m)
+ CASE_MASK_PMOVZX(PMOVZXBD, m)
+ CASE_MASKZ_PMOVZX(PMOVZXBD, m)
DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), false,
ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
@@ -1336,7 +1338,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
CASE_PMOVZX(PMOVZXBQ, r)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
[[fallthrough]];
- CASE_PMOVZX(PMOVZXBQ, m)
+ CASE_MASK_PMOVZX(PMOVZXBQ, m)
+ CASE_MASKZ_PMOVZX(PMOVZXBQ, m)
DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), false,
ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
@@ -1345,7 +1348,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
CASE_PMOVZX(PMOVZXWD, r)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
[[fallthrough]];
- CASE_PMOVZX(PMOVZXWD, m)
+ CASE_MASK_PMOVZX(PMOVZXWD, m)
+ CASE_MASKZ_PMOVZX(PMOVZXWD, m)
DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), false,
ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
@@ -1354,7 +1358,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
CASE_PMOVZX(PMOVZXWQ, r)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
[[fallthrough]];
- CASE_PMOVZX(PMOVZXWQ, m)
+ CASE_MASK_PMOVZX(PMOVZXWQ, m)
+ CASE_MASKZ_PMOVZX(PMOVZXWQ, m)
DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), false,
ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
@@ -1363,7 +1368,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
CASE_PMOVZX(PMOVZXDQ, r)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
[[fallthrough]];
- CASE_PMOVZX(PMOVZXDQ, m)
+ CASE_MASK_PMOVZX(PMOVZXDQ, m)
+ CASE_MASKZ_PMOVZX(PMOVZXDQ, m)
DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), false,
ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
diff --git a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
index 5917c1497d80e..f65fa5a2298ac 100644
--- a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
+++ b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
@@ -299,6 +299,10 @@ static Constant *rebuildSExtCst(const Constant *C, unsigned NumElts,
unsigned SrcEltBitWidth) {
return rebuildExtCst(C, true, NumElts, SrcEltBitWidth);
}
+static Constant *rebuildZExtCst(const Constant *C, unsigned NumElts,
+ unsigned SrcEltBitWidth) {
+ return rebuildExtCst(C, false, NumElts, SrcEltBitWidth);
+}
bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -416,13 +420,19 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
case X86::MOVDQUrm: {
FixupEntry Fixups[] = {
{HasSSE41 ? X86::PMOVSXBQrm : 0, 2, 8, rebuildSExtCst},
+ {HasSSE41 ? X86::PMOVZXBQrm : 0, 2, 8, rebuildZExtCst},
{X86::MOVDI2PDIrm, 1, 32, rebuildZeroUpperCst},
{HasSSE41 ? X86::PMOVSXBDrm : 0, 4, 8, rebuildSExtCst},
+ {HasSSE41 ? X86::PMOVZXBDrm : 0, 4, 8, rebuildZExtCst},
{HasSSE41 ? X86::PMOVSXWQrm : 0, 2, 16, rebuildSExtCst},
+ {HasSSE41 ? X86::PMOVZXWQrm : 0, 2, 16, rebuildZExtCst},
{X86::MOVQI2PQIrm, 1, 64, rebuildZeroUpperCst},
{HasSSE41 ? X86::PMOVSXBWrm : 0, 8, 8, rebuildSExtCst},
+ {HasSSE41 ? X86::PMOVZXBWrm : 0, 8, 8, rebuildZExtCst},
{HasSSE41 ? X86::PMOVSXWDrm : 0, 4, 16, rebuildSExtCst},
- {HasSSE41 ? X86::PMOVSXDQrm : 0, 2, 32, rebuildSExtCst}};
+ {HasSSE41 ? X86::PMOVZXWDrm : 0, 4, 16, rebuildZExtCst},
+ {HasSSE41 ? X86::PMOVSXDQrm : 0, 2, 32, rebuildSExtCst},
+ {HasSSE41 ? X86::PMOVZXDQrm : 0, 2, 32, rebuildZExtCst}};
return FixupConstant(Fixups, 1);
}
case X86::VMOVDQArm:
@@ -431,17 +441,23 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
{HasAVX2 ? X86::VPBROADCASTBrm : 0, 1, 8, rebuildSplatCst},
{HasAVX2 ? X86::VPBROADCASTWrm : 0, 1, 16, rebuildSplatCst},
{X86::VPMOVSXBQrm, 2, 8, rebuildSExtCst},
+ {X86::VPMOVZXBQrm, 2, 8, rebuildZExtCst},
{X86::VMOVDI2PDIrm, 1, 32, rebuildZeroUpperCst},
{HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm, 1, 32,
rebuildSplatCst},
{X86::VPMOVSXBDrm, 4, 8, rebuildSExtCst},
+ {X86::VPMOVZXBDrm, 4, 8, rebuildZExtCst},
{X86::VPMOVSXWQrm, 2, 16, rebuildSExtCst},
+ {X86::VPMOVZXWQrm, 2, 16, rebuildZExtCst},
{X86::VMOVQI2PQIrm, 1, 64, rebuildZeroUpperCst},
{HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm, 1, 64,
rebuildSplatCst},
{X86::VPMOVSXBWrm, 8, 8, rebuildSExtCst},
+ {X86::VPMOVZXBWrm, 8, 8, rebuildZExtCst},
{X86::VPMOVSXWDrm, 4, 16, rebuildSExtCst},
- {X86::VPMOVSXDQrm, 2, 32, rebuildSExtCst}};
+ {X86::VPMOVZXWDrm, 4, 16, rebuildZExtCst},
+ {X86::VPMOVSXDQrm, 2, 32, rebuildSExtCst},
+ {X86::VPMOVZXDQrm, 2, 32, rebuildZExtCst}};
return FixupConstant(Fixups, 1);
}
case X86::VMOVDQAYrm:
@@ -452,15 +468,21 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
{HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm, 1, 32,
rebuildSplatCst},
{HasAVX2 ? X86::VPMOVSXBQYrm : 0, 4, 8, rebuildSExtCst},
+ {HasAVX2 ? X86::VPMOVZXBQYrm : 0, 4, 8, rebuildZExtCst},
{HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm, 1, 64,
rebuildSplatCst},
{HasAVX2 ? X86::VPMOVSXBDYrm : 0, 8, 8, rebuildSExtCst},
+ {HasAVX2 ? X86::VPMOVZXBDYrm : 0, 8, 8, rebuildZExtCst},
{HasAVX2 ? X86::VPMOVSXWQYrm : 0, 4, 16, rebuildSExtCst},
+ {HasAVX2 ? X86::VPMOVZXWQYrm : 0, 4, 16, rebuildZExtCst},
{HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm, 1, 128,
rebuildSplatCst},
{HasAVX2 ? X86::VPMOVSXBWYrm : 0, 16, 8, rebuildSExtCst},
+ {HasAVX2 ? X86::VPMOVZXBWYrm : 0, 16, 8, rebuildZExtCst},
{HasAVX2 ? X86::VPMOVSXWDYrm : 0, 8, 16, rebuildSExtCst},
- {HasAVX2 ? X86::VPMOVSXDQYrm : 0, 4, 32, rebuildSExtCst}};
+ {HasAVX2 ? X86::VPMOVZXWDYrm : 0, 8, 16, rebuildZExtCst},
+ {HasAVX2 ? X86::VPMOVSXDQYrm : 0, 4, 32, rebuildSExtCst},
+ {HasAVX2 ? X86::VPMOVZXDQYrm : 0, 4, 32, rebuildZExtCst}};
return FixupConstant(Fixups, 1);
}
case X86::VMOVDQA32Z128rm:
@@ -471,15 +493,21 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
{HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1, 8, rebuildSplatCst},
{HasBWI ? X86::VPBROADCASTWZ128rm : 0, 1, 16, rebuildSplatCst},
{X86::VPMOVSXBQZ128rm, 2, 8, rebuildSExtCst},
+ {X86::VPMOVZXBQZ128rm, 2, 8, rebuildZExtCst},
{X86::VMOVDI2PDIZrm, 1, 32, rebuildZeroUpperCst},
{X86::VPBROADCASTDZ128rm, 1, 32, rebuildSplatCst},
{X86::VPMOVSXBDZ128rm, 4, 8, rebuildSExtCst},
+ {X86::VPMOVZXBDZ128rm, 4, 8, rebuildZExtCst},
{X86::VPMOVSXWQZ128rm, 2, 16, rebuildSExtCst},
+ {X86::VPMOVZXWQZ128rm, 2, 16, rebuildZExtCst},
{X86::VMOVQI2PQIZrm, 1, 64, rebuildZeroUpperCst},
{X86::VPBROADCASTQZ128rm, 1, 64, rebuildSplatCst},
{HasBWI ? X86::VPMOVSXBWZ128rm : 0, 8, 8, rebuildSExtCst},
+ {HasBWI ? X86::VPMOVZXBWZ128rm : 0, 8, 8, rebuildZExtCst},
{X86::VPMOVSXWDZ128rm, 4, 16, rebuildSExtCst},
- {X86::VPMOVSXDQZ128rm, 2, 32, rebuildSExtCst}};
+ {X86::VPMOVZXWDZ128rm, 4, 16, rebuildZExtCst},
+ {X86::VPMOVSXDQZ128rm, 2, 32, rebuildSExtCst},
+ {X86::VPMOVZXDQZ128rm, 2, 32, rebuildZExtCst}};
return FixupConstant(Fixups, 1);
}
case X86::VMOVDQA32Z256rm:
@@ -491,13 +519,19 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
{HasBWI ? X86::VPBROADCASTWZ256rm : 0, 1, 16, rebuildSplatCst},
{X86::VPBROADCASTDZ256rm, 1, 32, rebuildSplatCst},
{X86::VPMOVSXBQZ256rm, 4, 8, rebuildSExtCst},
+ {X86::VPMOVZXBQZ256rm, 4, 8, rebuildZExtCst},
{X86::VPBROADCASTQZ256rm, 1, 64, rebuildSplatCst},
{X86::VPMOVSXBDZ256rm, 8, 8, rebuildSExtCst},
+ {X86::VPMOVZXBDZ256rm, 8, 8, rebuildZExtCst},
{X86::VPMOVSXWQZ256rm, 4, 16, rebuildSExtCst},
+ {X86::VPMOVZXWQZ256rm, 4, 16, rebuildZExtCst},
{X86::VBROADCASTI32X4Z256rm, 1, 128, rebuildSplatCst},
{HasBWI ? X86::VPMOVSXBWZ256rm : 0, 16, 8, rebuildSExtCst},
+ {HasBWI ? X86::VPMOVZXBWZ256rm : 0, 16, 8, rebuildZExtCst},
{X86::VPMOVSXWDZ256rm, 8, 16, rebuildSExtCst},
- {X86::VPMOVSXDQZ256rm, 4, 32, rebuildSExtCst}};
+ {X86::VPMOVZXWDZ256rm, 8, 16, rebuildZExtCst},
+ {X86::VPMOVSXDQZ256rm, 4, 32, rebuildSExtCst},
+ {X86::VPMOVZXDQZ256rm, 4, 32, rebuildZExtCst}};
return FixupConstant(Fixups, 1);
}
case X86::VMOVDQA32Zrm:
@@ -510,13 +544,19 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
{X86::VPBROADCASTDZrm, 1, 32, rebuildSplatCst},
{X86::VPBROADCASTQZrm, 1, 64, rebuildSplatCst},
{X86::VPMOVSXBQZrm, 8, 8, rebuildSExtCst},
+ {X86::VPMOVZXBQZrm, 8, 8, rebuildZExtCst},
{X86::VBROADCASTI32X4rm, 1, 128, rebuildSplatCst},
{X86::VPMOVSXBDZrm, 16, 8, rebuildSExtCst},
+ {X86::VPMOVZXBDZrm, 16, 8, rebuildZExtCst},
{X86::VPMOVSXWQZrm, 8, 16, rebuildSExtCst},
+ {X86::VPMOVZXWQZrm, 8, 16, rebuildZExtCst},
{X86::VBROADCASTI64X4rm, 1, 256, rebuildSplatCst},
{HasBWI ? X86::VPMOVSXBWZrm : 0, 32, 8, rebuildSExtCst},
+ {HasBWI ? X86::VPMOVZXBWZrm : 0, 32, 8, rebuildZExtCst},
{X86::VPMOVSXWDZrm, 16, 16, rebuildSExtCst},
- {X86::VPMOVSXDQZrm, 8, 32, rebuildSExtCst}};
+ {X86::VPMOVZXWDZrm, 16, 16, rebuildZExtCst},
+ {X86::VPMOVSXDQZrm, 8, 32, rebuildSExtCst},
+ {X86::VPMOVZXDQZrm, 8, 32, rebuildZExtCst}};
return FixupConstant(Fixups, 1);
}
}
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 1a26489460882..45dd98cde6fa1 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1388,6 +1388,18 @@ PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
return MBBI;
}
+static unsigned getRegisterWidth(const MCOperandInfo &Info) {
+ if (Info.RegClass == X86::VR128RegClassID ||
+ Info.RegClass == X86::VR128XRegClassID)
+ return 128;
+ if (Info.RegClass == X86::VR256RegClassID ||
+ Info.RegClass == X86::VR256XRegClassID)
+ return 256;
+ if (Info.RegClass == X86::VR512RegClassID)
+ return 512;
+ llvm_unreachable("Unknown register class!");
+}
+
static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
unsigned SrcOp2Idx, ArrayRef<int> Mask) {
std::string Comment;
@@ -1582,8 +1594,8 @@ static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer,
}
}
-static bool printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
- int SrcEltBits, int DstEltBits) {
+static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
+ int SrcEltBits, int DstEltBits, bool IsSext) {
auto *C = X86::getConstantFromPool(*MI, 1);
if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) {
if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
@@ -1598,7 +1610,8 @@ static bool printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
if (i != 0)
CS << ",";
if (CDS->getElementType()->isIntegerTy()) {
- APInt Elt = CDS->getElementAsAPInt(i).sext(DstEltBits);
+ APInt Elt = CDS->getElementAsAPInt(i);
+ Elt = IsSext ? Elt.sext(DstEltBits) : Elt.zext(DstEltBits);
printConstant(Elt, CS);
} else
CS << "?";
@@ -1611,6 +1624,36 @@ static bool printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
return false;
}
+static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
+ int SrcEltBits, int DstEltBits) {
+ printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, true);
+}
+static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
+ int SrcEltBits, int DstEltBits) {
+ if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, false))
+ return;
+
+ // We didn't find a constant load, fallback to a shuffle mask decode.
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+
+ const MachineOperand &DstOp = MI->getOperand(0);
+ CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
+
+ unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]);
+ assert((Width % DstEltBits) == 0 && (DstEltBits % SrcEltBits) == 0 &&
+ "Illegal extension ratio");
+ unsigned NumElts = Width / DstEltBits;
+ unsigned Scale = DstEltBits / SrcEltBits;
+ for (unsigned I = 0; I != NumElts; ++I) {
+ if (I != 0)
+ CS << ",";
+ CS << "mem[" << I << "]";
+ for (unsigned S = 1; S != Scale; ++S)
+ CS << ",zero";
+ }
+ OutStreamer.AddComment(CS.str());
+}
void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
@@ -1688,18 +1731,6 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
}
}
-static unsigned getRegisterWidth(const MCOperandInfo &Info) {
- if (Info.RegClass == X86::VR128RegClassID ||
- Info.RegClass == X86::VR128XRegClassID)
- return 128;
- if (Info.RegClass == X86::VR256RegClassID ||
- Info.RegClass == X86::VR256XRegClassID)
- return 256;
- if (Info.RegClass == X86::VR512RegClassID)
- return 512;
- llvm_unreachable("Unknown register class!");
-}
-
static void addConstantComments(const MachineInstr *MI,
MCStreamer &OutStreamer) {
switch (MI->getOpcode()) {
@@ -2039,6 +2070,25 @@ static void addConstantComments(const MachineInstr *MI,
CASE_MOVX_RM(SX, WQ)
printSignExtend(MI, OutStreamer, 16, 64);
break;
+
+ CASE_MOVX_RM(ZX, BD)
+ printZeroExtend(MI, OutStreamer, 8, 32);
+ break;
+ CASE_MOVX_RM(ZX, BQ)
+ printZeroExtend(MI, OutStreamer, 8, 64);
+ break;
+ CASE_MOVX_RM(ZX, BW)
+ printZeroExtend(MI, OutStreamer, 8, 16);
+ break;
+ CASE_MOVX_RM(ZX, DQ)
+ printZeroExtend(MI, OutStreamer, 32, 64);
+ break;
+ CASE_MOVX_RM(ZX, WD)
+ printZeroExtend(MI, OutStreamer, 16, 32);
+ break;
+ CASE_MOVX_RM(ZX, WQ)
+ printZeroExtend(MI, OutStreamer, 16, 64);
+ break;
}
}
diff --git a/llvm/test/CodeGen/X86/avx2-vector-shifts.ll b/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
index 4bf2e2456482e..983c69d1a1c2e 100644
--- a/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
+++ b/llvm/test/CodeGen/X86/avx2-vector-shifts.ll
@@ -60,14 +60,14 @@ define <8 x i32> @test_vpslld_var(i32 %shift) {
; X86-LABEL: test_vpslld_var:
; X86: # %bb.0:
; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: vpmovsxwd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
+; X86-NEXT: vpmovzxbd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
; X86-NEXT: vpslld %xmm0, %ymm1, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: test_vpslld_var:
; X64: # %bb.0:
; X64-NEXT: vmovd %edi, %xmm0
-; X64-NEXT: vpmovsxwd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
+; X64-NEXT: vpmovzxbd {{.*#+}} ymm1 = [192,193,194,195,196,197,198,199]
; X64-NEXT: vpslld %xmm0, %ymm1, %ymm0
; X64-NEXT: retq
%amt = insertelement <8 x i32> undef, i32 %shift, i32 0
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
index a16659eab9763..6255621d870e1 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
@@ -108,7 +108,7 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) {
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
@@ -117,7 +117,7 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -268,7 +268,7 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vpmovsxwd {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -445,7 +445,7 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
; AVX2-NEXT: vpmovsxbq {{.*#+}} ymm0 = [1,2,4,8]
; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
-; AVX2-NEXT: vpmovsxwq {{.*#+}} ymm2 = [16,32,64,128]
+; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = [16,32,64,128]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
@@ -505,10 +505,10 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm1
-; AVX2-NEXT: vpmovsxwd {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
index 48abed8b6f222..bc8964f30938b 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
@@ -134,7 +134,7 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) {
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/80428
More information about the llvm-commits
mailing list