[llvm] [X86] X86FixupVectorConstants - shrink vector load to movsd/movsd/movd/movq 'zero upper' instructions (PR #79000)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 22 08:05:43 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
If we're loading a vector constant that is known to be zero in the upper elements, then attempt to shrink the constant and just scalar load the lower 32/64 bits.
---
Patch is 137.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79000.diff
55 Files Affected:
- (modified) llvm/lib/Target/X86/X86FixupVectorConstants.cpp (+70-8)
- (modified) llvm/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/avx-load-store.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/avx2-arith.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/combine-srl.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/constant-pool-sharing.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/dpbusd.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/dpbusd_const.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/fcmp-constant.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/half.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/insert-into-constant-vector.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/insertelement-ones.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/memcmp.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr46532.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr63108.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/pr74736.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/prefer-avx256-lzcnt.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pshufb-mask-comments.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/ret-mmx.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/sad.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/shuffle-half.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll (+28-28)
- (modified) llvm/test/CodeGen/X86/vec_set-A.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-fshl-128.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/vector-fshl-256.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-fshl-rot-128.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-fshl-rot-256.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-fshr-128.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/vector-fshr-256.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-fshr-rot-128.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-fshr-rot-256.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-half-conversions.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-lzcnt-128.ll (+56-56)
- (modified) llvm/test/CodeGen/X86/vector-lzcnt-256.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/vector-rotate-128.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/vector-rotate-256.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll (+14-14)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-v1.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-tzcnt-128.ll (+14-14)
- (modified) llvm/test/CodeGen/X86/vselect-constants.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vselect-post-combine.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/widen_bitcnt.ll (+10-10)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg.ll (+32-32)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll (+6-6)
``````````diff
diff --git a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
index 9da8d7338ea36c..8467aff3376076 100644
--- a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
+++ b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
@@ -223,6 +223,33 @@ static Constant *rebuildSplatableConstant(const Constant *C,
return rebuildConstant(OriginalType->getContext(), SclTy, *Splat, NumSclBits);
}
+static Constant *rebuildZeroUpperConstant(const Constant *C,
+ unsigned ScalarBitWidth) {
+ Type *Ty = C->getType();
+ Type *SclTy = Ty->getScalarType();
+ unsigned NumBits = Ty->getPrimitiveSizeInBits();
+ unsigned NumSclBits = SclTy->getPrimitiveSizeInBits();
+ LLVMContext &Ctx = C->getContext();
+
+ if (NumBits > ScalarBitWidth) {
+ // Determine if the upper bits are all zero.
+ if (std::optional<APInt> Bits = extractConstantBits(C)) {
+ if (Bits->countLeadingZeros() >= (NumBits - ScalarBitWidth)) {
+ // If the original constant was made of smaller elements, try to retain
+ // those types.
+ if (ScalarBitWidth > NumSclBits && (ScalarBitWidth % NumSclBits) == 0)
+ return rebuildConstant(Ctx, SclTy, *Bits, NumSclBits);
+
+ // Fallback to raw integer bits.
+ APInt RawBits = Bits->zextOrTrunc(ScalarBitWidth);
+ return ConstantInt::get(Ctx, RawBits);
+ }
+ }
+ }
+
+ return nullptr;
+}
+
bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineInstr &MI) {
@@ -263,6 +290,34 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
return false;
};
+ auto ConvertToZeroUpper = [&](unsigned OpUpper64, unsigned OpUpper32,
+ unsigned OperandNo) {
+ assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) &&
+ "Unexpected number of operands!");
+
+ if (auto *C = X86::getConstantFromPool(MI, OperandNo)) {
+ // Attempt to detect a suitable splat from increasing splat widths.
+ std::pair<unsigned, unsigned> ZeroUppers[] = {
+ {32, OpUpper32},
+ {64, OpUpper64},
+ };
+ for (auto [BitWidth, OpUpper] : ZeroUppers) {
+ if (OpUpper) {
+ // Construct a suitable splat constant and adjust the MI to
+ // use the new constant pool entry.
+ if (Constant *NewCst = rebuildZeroUpperConstant(C, BitWidth)) {
+ unsigned NewCPI =
+ CP->getConstantPoolIndex(NewCst, Align(BitWidth / 8));
+ MI.setDesc(TII->get(OpUpper));
+ MI.getOperand(OperandNo + X86::AddrDisp).setIndex(NewCPI);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ };
+
// Attempt to convert full width vector loads into broadcast loads.
switch (Opc) {
/* FP Loads */
@@ -271,13 +326,14 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
case X86::MOVUPDrm:
case X86::MOVUPSrm:
// TODO: SSE3 MOVDDUP Handling
- return false;
+ return ConvertToZeroUpper(X86::MOVSDrm, X86::MOVSSrm, 1);
case X86::VMOVAPDrm:
case X86::VMOVAPSrm:
case X86::VMOVUPDrm:
case X86::VMOVUPSrm:
return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
- 1);
+ 1) ||
+ ConvertToZeroUpper(X86::VMOVSDrm, X86::VMOVSSrm, 1);
case X86::VMOVAPDYrm:
case X86::VMOVAPSYrm:
case X86::VMOVUPDYrm:
@@ -289,7 +345,8 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
case X86::VMOVUPDZ128rm:
case X86::VMOVUPSZ128rm:
return ConvertToBroadcast(0, 0, X86::VMOVDDUPZ128rm,
- X86::VBROADCASTSSZ128rm, 0, 0, 1);
+ X86::VBROADCASTSSZ128rm, 0, 0, 1) ||
+ ConvertToZeroUpper(X86::VMOVSDZrm, X86::VMOVSSZrm, 1);
case X86::VMOVAPDZ256rm:
case X86::VMOVAPSZ256rm:
case X86::VMOVUPDZ256rm:
@@ -305,13 +362,17 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0, 0,
1);
/* Integer Loads */
+ case X86::MOVDQArm:
+ case X86::MOVDQUrm:
+ return ConvertToZeroUpper(X86::MOVQI2PQIrm, X86::MOVDI2PDIrm, 1);
case X86::VMOVDQArm:
case X86::VMOVDQUrm:
return ConvertToBroadcast(
- 0, 0, HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm,
- HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm,
- HasAVX2 ? X86::VPBROADCASTWrm : 0, HasAVX2 ? X86::VPBROADCASTBrm : 0,
- 1);
+ 0, 0, HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm,
+ HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm,
+ HasAVX2 ? X86::VPBROADCASTWrm : 0,
+ HasAVX2 ? X86::VPBROADCASTBrm : 0, 1) ||
+ ConvertToZeroUpper(X86::VMOVQI2PQIrm, X86::VMOVDI2PDIrm, 1);
case X86::VMOVDQAYrm:
case X86::VMOVDQUYrm:
return ConvertToBroadcast(
@@ -327,7 +388,8 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
return ConvertToBroadcast(0, 0, X86::VPBROADCASTQZ128rm,
X86::VPBROADCASTDZ128rm,
HasBWI ? X86::VPBROADCASTWZ128rm : 0,
- HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1);
+ HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1) ||
+ ConvertToZeroUpper(X86::VMOVQI2PQIZrm, X86::VMOVDI2PDIZrm, 1);
case X86::VMOVDQA32Z256rm:
case X86::VMOVDQA64Z256rm:
case X86::VMOVDQU32Z256rm:
diff --git a/llvm/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll b/llvm/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
index e4e5d51d272d61..1ae51ee9756388 100644
--- a/llvm/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
+++ b/llvm/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
@@ -7,7 +7,7 @@
define void @ui_to_fp_conv(ptr nocapture %aFOO, ptr nocapture %RET) nounwind {
; CHECK-LABEL: ui_to_fp_conv:
; CHECK: # %bb.0: # %allocas
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,1.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,1.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: movups %xmm1, 16(%rsi)
; CHECK-NEXT: movups %xmm0, (%rsi)
diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll
index 33eb704788740a..3f856d33145d86 100644
--- a/llvm/test/CodeGen/X86/avx-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx-load-store.ll
@@ -220,7 +220,7 @@ define void @f_f() nounwind {
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB9_4
; CHECK-NEXT: # %bb.3: # %cif_mixed_test_all
-; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,0,0,0]
+; CHECK-NEXT: vmovss {{.*#+}} xmm0 = [4294967295,0,0,0]
; CHECK-NEXT: vmaskmovps %ymm0, %ymm0, (%rax)
; CHECK-NEXT: .LBB9_4: # %cif_mixed_test_any_check
;
diff --git a/llvm/test/CodeGen/X86/avx2-arith.ll b/llvm/test/CodeGen/X86/avx2-arith.ll
index 3e695811719448..d32143cf33f2fa 100644
--- a/llvm/test/CodeGen/X86/avx2-arith.ll
+++ b/llvm/test/CodeGen/X86/avx2-arith.ll
@@ -234,7 +234,7 @@ define <8 x i16> @mul_const8(<8 x i16> %x) {
define <8 x i32> @mul_const9(<8 x i32> %x) {
; CHECK-LABEL: mul_const9:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [2,0,0,0]
+; CHECK-NEXT: vmovd {{.*#+}} xmm1 = [2,0,0,0]
; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/X86/combine-srl.ll b/llvm/test/CodeGen/X86/combine-srl.ll
index 3e0f581ea01a0a..125196a0819b61 100644
--- a/llvm/test/CodeGen/X86/combine-srl.ll
+++ b/llvm/test/CodeGen/X86/combine-srl.ll
@@ -356,7 +356,7 @@ define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) {
; SSE-LABEL: combine_vec_lshr_lzcnt_bit1:
; SSE: # %bb.0:
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; SSE-NEXT: movq {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: pshufb %xmm0, %xmm2
; SSE-NEXT: psrlw $4, %xmm0
@@ -378,7 +378,7 @@ define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) {
; AVX-LABEL: combine_vec_lshr_lzcnt_bit1:
; AVX: # %bb.0:
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
+; AVX-NEXT: vmovq {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm2
; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
diff --git a/llvm/test/CodeGen/X86/constant-pool-sharing.ll b/llvm/test/CodeGen/X86/constant-pool-sharing.ll
index db5a7810974c9d..062d87ed035fd6 100644
--- a/llvm/test/CodeGen/X86/constant-pool-sharing.ll
+++ b/llvm/test/CodeGen/X86/constant-pool-sharing.ll
@@ -77,7 +77,7 @@ define void @store_repeated_constants(ptr %lo, ptr %hi) {
; SSE-LINUX: # %bb.0:
; SSE-LINUX-NEXT: xorps %xmm0, %xmm0
; SSE-LINUX-NEXT: movaps %xmm0, 48(%rdi)
-; SSE-LINUX-NEXT: movaps {{.*#+}} xmm1 = [18446744073709551615,0]
+; SSE-LINUX-NEXT: movsd {{.*#+}} xmm1 = [18446744073709551615,0]
; SSE-LINUX-NEXT: movaps %xmm1, 32(%rdi)
; SSE-LINUX-NEXT: movaps %xmm1, 16(%rdi)
; SSE-LINUX-NEXT: movaps %xmm1, (%rdi)
@@ -92,7 +92,7 @@ define void @store_repeated_constants(ptr %lo, ptr %hi) {
; SSE-MSVC: # %bb.0:
; SSE-MSVC-NEXT: xorps %xmm0, %xmm0
; SSE-MSVC-NEXT: movaps %xmm0, 48(%rcx)
-; SSE-MSVC-NEXT: movaps {{.*#+}} xmm1 = [18446744073709551615,0]
+; SSE-MSVC-NEXT: movsd {{.*#+}} xmm1 = [18446744073709551615,0]
; SSE-MSVC-NEXT: movaps %xmm1, 32(%rcx)
; SSE-MSVC-NEXT: movaps %xmm1, 16(%rcx)
; SSE-MSVC-NEXT: movaps %xmm1, (%rcx)
diff --git a/llvm/test/CodeGen/X86/dpbusd.ll b/llvm/test/CodeGen/X86/dpbusd.ll
index 06f11e6d527bde..fbea08eb1e5502 100644
--- a/llvm/test/CodeGen/X86/dpbusd.ll
+++ b/llvm/test/CodeGen/X86/dpbusd.ll
@@ -379,7 +379,7 @@ define i32 @vpdpbusd_2xi32(ptr%a, ptr%b, i32 %c, i32 %n) {
; AVX512VNNI-LABEL: vpdpbusd_2xi32:
; AVX512VNNI: # %bb.0: # %entry
; AVX512VNNI-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX512VNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,0,0,0]
+; AVX512VNNI-NEXT: vmovd {{.*#+}} xmm1 = [65535,0,0,0]
; AVX512VNNI-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512VNNI-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX512VNNI-NEXT: vpandq %zmm1, %zmm2, %zmm1
diff --git a/llvm/test/CodeGen/X86/dpbusd_const.ll b/llvm/test/CodeGen/X86/dpbusd_const.ll
index 2fe9feb06dff67..5862e614265b1f 100644
--- a/llvm/test/CodeGen/X86/dpbusd_const.ll
+++ b/llvm/test/CodeGen/X86/dpbusd_const.ll
@@ -108,7 +108,7 @@ define i32 @mul_4xi8_cs(<4 x i8> %a, i32 %c) {
; AVXVNNI: # %bb.0: # %entry
; AVXVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVXVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVXVNNI-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVXVNNI-NEXT: vmovd {{.*#+}} xmm2 = [0,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
; AVXVNNI-NEXT: {vex} vpdpbusd %xmm0, %xmm2, %xmm1
; AVXVNNI-NEXT: vmovd %xmm1, %eax
; AVXVNNI-NEXT: addl %edi, %eax
@@ -118,7 +118,7 @@ define i32 @mul_4xi8_cs(<4 x i8> %a, i32 %c) {
; AVX512VNNI: # %bb.0: # %entry
; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVX512VNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VNNI-NEXT: vmovd {{.*#+}} xmm1 = [0,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512VNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VNNI-NEXT: vpdpbusd %zmm0, %zmm1, %zmm2
; AVX512VNNI-NEXT: vmovd %xmm2, %eax
@@ -130,7 +130,7 @@ define i32 @mul_4xi8_cs(<4 x i8> %a, i32 %c) {
; AVX512VLVNNI: # %bb.0: # %entry
; AVX512VLVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VLVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVX512VLVNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VLVNNI-NEXT: vmovd {{.*#+}} xmm1 = [0,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512VLVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLVNNI-NEXT: vpdpbusd %xmm0, %xmm1, %xmm2
; AVX512VLVNNI-NEXT: vmovd %xmm2, %eax
diff --git a/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll b/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll
index 3b015acb69bd2e..0a52dfff71eda4 100644
--- a/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll
@@ -532,7 +532,7 @@ define <2 x half> @vfptrunc_v2f16_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroex
; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
; AVX512-NEXT: callq __truncdfhf2 at PLT
; AVX512-NEXT: vpbroadcastw %xmm0, %xmm1
-; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4,0,0,0]
+; AVX512-NEXT: vmovss {{.*#+}} xmm0 = [4,0,0,0]
; AVX512-NEXT: vpermi2ps (%rsp), %xmm1, %xmm0 # 16-byte Folded Reload
; AVX512-NEXT: addq $40, %rsp
; AVX512-NEXT: .cfi_def_cfa_offset 8
diff --git a/llvm/test/CodeGen/X86/fcmp-constant.ll b/llvm/test/CodeGen/X86/fcmp-constant.ll
index 481a32b39dd377..335cb28213f929 100644
--- a/llvm/test/CodeGen/X86/fcmp-constant.ll
+++ b/llvm/test/CodeGen/X86/fcmp-constant.ll
@@ -92,7 +92,7 @@ define <2 x i64> @fcmp_ueq_v2f64_undef() {
define <2 x i64> @fcmp_ueq_v2f64_undef_elt() {
; CHECK-LABEL: fcmp_ueq_v2f64_undef_elt:
; CHECK: # %bb.0:
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,0]
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [18446744073709551615,0]
; CHECK-NEXT: retq
%1 = fcmp ueq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double undef, double 0x3FF0000000000000>
%2 = sext <2 x i1> %1 to <2 x i64>
diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll
index b42f6fdea34b65..d0853fdc748d29 100644
--- a/llvm/test/CodeGen/X86/half.ll
+++ b/llvm/test/CodeGen/X86/half.ll
@@ -2116,7 +2116,7 @@ define void @pr63114() {
; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1]
; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535]
; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm0
-; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
+; CHECK-LIBCALL-NEXT: movq {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
; CHECK-LIBCALL-NEXT: por %xmm2, %xmm0
; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0]
; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm0
@@ -2181,7 +2181,7 @@ define void @pr63114() {
; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1]
; CHECK-I686-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535]
; CHECK-I686-NEXT: pand %xmm1, %xmm0
-; CHECK-I686-NEXT: movdqa {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
+; CHECK-I686-NEXT: movq {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
; CHECK-I686-NEXT: por %xmm2, %xmm0
; CHECK-I686-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0]
; CHECK-I686-NEXT: pand %xmm3, %xmm0
diff --git a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll
index 07787f3851bd90..05f81825d18051 100644
--- a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll
+++ b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll
@@ -375,7 +375,7 @@ define <8 x i64> @elt5_v8i64(i64 %x) {
; X86-SSE-LABEL: elt5_v8i64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [4,0,0,0]
+; X86-SSE-NEXT: movss {{.*#+}} xmm2 = [4,0,0,0]
; X86-SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [42,0,1,0]
; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [2,0,3,0]
@@ -404,7 +404,7 @@ define <8 x i64> @elt5_v8i64(i64 %x) {
; X86-AVX1-LABEL: elt5_v8i64:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [4,0,0,0]
+; X86-AVX1-NEXT: vmovss {{.*#+}} xmm1 = [4,0,0,0]
; X86-AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; X86-AVX1-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
@@ -421,7 +421,7 @@ define <8 x i64> @elt5_v8i64(i64 %x) {
; X86-AVX2-LABEL: elt5_v8i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX2-NEXT: vmovaps {{.*#+}} xmm1 = [4,0,0,0]
+; X86-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [4,0,0,0]
; X86-AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; X86-AVX2-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
@@ -439,7 +439,7 @@ define <8 x i64> @elt5_v8i64(i64 %x) {
; X86-AVX512F: # %bb.0:
; X86-AVX512F-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0]
; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; X86-AVX512F-NEXT: vmovaps {{.*#+}} xmm2 = [4,0,0,0]
+; X86-AVX512F-NEXT: vmovss {{.*#+}} xmm2 = [4,0,0,0]
; X86-AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; X86-AVX512F-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll
index ed7cbb0a8430d9..cfcb8798e0b9cc 100644
--- a/llvm/test/CodeGen/X86/insertelement-ones.ll
+++ b/llvm/test/CodeGen/X86/insertelement-ones.ll
@@ -280,7 +280,7 @@ define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
;
; AVX1-LABEL: insert_v16i16_x12345x789ABCDEx:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [65535,0,0,0]
+; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [65535,0,0,0]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
@@ -385,7 +385,7 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
;
; AVX1-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,0,0]
+; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [255,0,0,0]
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll b/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
index f63545bcd178e5..aad1b443448503 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
@@ -739,7 +739,7 @@ define <17 x float> @test_mgather_v17f32(ptr %base, <17 x i32> %index)
; WIDEN_AVX2-NEXT: vgatherdps %ymm5, (%rsi,%ymm1,4), %ymm6
; WIDEN_AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; WIDEN_AVX2-NEXT: vgatherdps %ymm3, (%rsi,%ymm0,4), %ymm1
-; WIDEN_AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,0,0,0]
+; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm0 = [4294967295,0,0,0]
; WIDEN_AVX2-NEXT: vgatherdps %ymm0, (%rsi,%ymm2,4), %ymm4
; WIDEN_AVX2-NEXT: vmovss %xmm4, 64(%rdi)
; WIDEN_AVX2-NEXT: vmovaps %ymm1, 32(%rdi)
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
index 6eb02bfc1fd0c3..da46ea40655791 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
@@ -947,7 +947,7 @@ define i1 @length24_eq_const(ptr %X) nounwind {
; X64-MIC-AVX: # %bb.0:
; X64-MIC-AVX-NEXT: vmovdqu (%rdi), %xmm0
; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [959985462,858927408,0,0]
+; X64-MIC-AVX-NEXT: vmovq {{.*#+}} xmm2 = [959985462,858927408,0,0]
; X64-MIC-AVX-NEXT: vpcmpneqd %zmm2, %zmm1, %k0
; X64-MIC-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [858927408,926299444,825243960,892613426]
; X64-MIC-AVX-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/C...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/79000
More information about the llvm-commits
mailing list