[llvm] [X86] X86FixupVectorConstants - load+sign-extend vector constants that can be stored in a truncated form (PR #79815)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 29 04:09:48 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Reduce the size of the vector constant by storing it in the constant pool in a truncated form, and sign-extend it as part of the load.
I've extended the existing FixupConstant functionality to support these sext constant rebuilds - we still select the smallest stored constant entry and prefer vzload/broadcast/vextload for same bitwidth to avoid domain flips.
I intend to add the matching load+zero-extend handling in a future PR, but that requires some alterations to the existing MC shuffle comments handling first.
NOTE: Some of the FixupConstant tables are currently created on the fly as they are dependent on the supported ISAs (HasAVX2 etc.) - should we split these (to allow initializer lists instead) and have duplicate FixupConstant calls to avoid additional stack use?
---
Patch is 6.04 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79815.diff
223 Files Affected:
- (modified) llvm/lib/Target/X86/X86FixupVectorConstants.cpp (+202-85)
- (modified) llvm/lib/Target/X86/X86MCInstLower.cpp (+61-1)
- (modified) llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll (+53-57)
- (modified) llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll (+47-49)
- (modified) llvm/test/CodeGen/X86/avg.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/avx-vperm2x128.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/avx2-arith.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll (+154-154)
- (modified) llvm/test/CodeGen/X86/avx2-vector-shifts.ll (+4-6)
- (modified) llvm/test/CodeGen/X86/avx512-arith.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/avx512-intrinsics.ll (+16-16)
- (modified) llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (+143-147)
- (modified) llvm/test/CodeGen/X86/avx512-shuffles/permute.ll (+88-88)
- (modified) llvm/test/CodeGen/X86/avx512bw-intrinsics.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll (+24-24)
- (modified) llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (+12-12)
- (modified) llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/combine-add.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/combine-addo.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/combine-and.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/combine-bitselect.ll (+11-15)
- (modified) llvm/test/CodeGen/X86/combine-mul.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/combine-pavg.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/combine-pmuldq.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/combine-sdiv.ll (+26-31)
- (modified) llvm/test/CodeGen/X86/combine-shl.ll (+1-2)
- (modified) llvm/test/CodeGen/X86/combine-sra.ll (+3-4)
- (modified) llvm/test/CodeGen/X86/combine-srem.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/combine-srl.ll (+3-6)
- (modified) llvm/test/CodeGen/X86/combine-sub-usat.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/combine-sub.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/combine-udiv.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/combine-urem.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll (+12-12)
- (modified) llvm/test/CodeGen/X86/extract-concat.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+4-6)
- (modified) llvm/test/CodeGen/X86/fpclamptosat_vec.ll (+6-10)
- (modified) llvm/test/CodeGen/X86/i64-to-float.ll (+2-4)
- (modified) llvm/test/CodeGen/X86/icmp-abs-C-vec.ll (+14-14)
- (modified) llvm/test/CodeGen/X86/icmp-pow2-diff.ll (+19-9)
- (modified) llvm/test/CodeGen/X86/insert-into-constant-vector.ll (+40-41)
- (modified) llvm/test/CodeGen/X86/insertelement-shuffle.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/known-signbits-vector.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/masked_load.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/masked_store.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll (+81-150)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_usat.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-128.ll (+10-10)
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-256.ll (+10-20)
- (modified) llvm/test/CodeGen/X86/min-legal-vector-width.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/movmsk-cmp.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/oddshuffles.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/packus.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/paddus.ll (+92-28)
- (modified) llvm/test/CodeGen/X86/pmul.ll (+72-37)
- (modified) llvm/test/CodeGen/X86/pmulh.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr48215.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/pr57340.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr61964.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/pr62014.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/pr63507.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr74736.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr77459.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/pr78109.ll (+2-6)
- (modified) llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/psubus.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/sat-add.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/setcc-non-simple-type.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/sext-vsetcc.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll (+6-11)
- (modified) llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/slow-pmulld.ll (+20-20)
- (modified) llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/srem-vector-lkk.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/sse-domains.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/subvector-broadcast.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/urem-vector-lkk.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/usub_sat_vec.ll (+50-16)
- (modified) llvm/test/CodeGen/X86/var-permute-256.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vec_int_to_fp.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/vec_setcc-2.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/vec_setcc.ll (+42-20)
- (modified) llvm/test/CodeGen/X86/vec_shift6.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vec_smulo.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vec_umulo.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-bo-select.ll (+183-132)
- (modified) llvm/test/CodeGen/X86/vector-fshl-128.ll (+88-90)
- (modified) llvm/test/CodeGen/X86/vector-fshl-256.ll (+14-16)
- (modified) llvm/test/CodeGen/X86/vector-fshl-512.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-fshl-rot-128.ll (+57-58)
- (modified) llvm/test/CodeGen/X86/vector-fshl-rot-256.ll (+12-13)
- (modified) llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-fshl-sub128.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-fshr-128.ll (+89-91)
- (modified) llvm/test/CodeGen/X86/vector-fshr-256.ll (+13-15)
- (modified) llvm/test/CodeGen/X86/vector-fshr-512.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-fshr-rot-128.ll (+57-58)
- (modified) llvm/test/CodeGen/X86/vector-fshr-rot-256.ll (+12-13)
- (modified) llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-fshr-sub128.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-2.ll (+22-22)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-3.ll (+99-99)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll (+40-40)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-5.ll (+250-259)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-6.ll (+125-125)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll (+243-263)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-8.ll (+140-140)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-2.ll (+52-52)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll (+196-196)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll (+68-68)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-5.ll (+404-404)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll (+304-320)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll (+376-434)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll (+196-260)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-2.ll (+72-72)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-3.ll (+216-216)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-5.ll (+332-332)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-6.ll (+184-188)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-7.ll (+180-224)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-8.ll (+64-96)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-2.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-3.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll (+16-16)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-5.ll (+84-84)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-6.ll (+88-88)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll (+275-302)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-8.ll (+20-20)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-2.ll (+24-24)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-3.ll (+177-177)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll (+105-105)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll (+288-288)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-6.ll (+279-282)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll (+661-683)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-8.ll (+543-543)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-2.ll (+48-48)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-3.ll (+160-160)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-4.ll (+208-208)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll (+179-179)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-6.ll (+409-411)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll (+940-956)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll (+848-848)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-2.ll (+64-64)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-3.ll (+208-208)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll (+260-260)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-5.ll (+440-440)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-6.ll (+408-440)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-7.ll (+344-344)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-8.ll (+16-16)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-2.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll (+103-103)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll (+137-137)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll (+69-71)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll (+118-118)
- (modified) llvm/test/CodeGen/X86/vector-mul.ll (+359-203)
- (modified) llvm/test/CodeGen/X86/vector-pack-512.ll (+18-18)
- (modified) llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll (+287-348)
- (modified) llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll (+122-244)
- (modified) llvm/test/CodeGen/X86/vector-reduce-add-mask.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-reduce-ctpop.ll (+2-4)
- (modified) llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll (+327-327)
- (modified) llvm/test/CodeGen/X86/vector-rotate-128.ll (+44-45)
- (modified) llvm/test/CodeGen/X86/vector-rotate-256.ll (+9-11)
- (modified) llvm/test/CodeGen/X86/vector-sext.ll (+71-28)
- (modified) llvm/test/CodeGen/X86/vector-shift-ashr-128.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-shift-ashr-256.ll (+6-8)
- (modified) llvm/test/CodeGen/X86/vector-shift-ashr-512.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-shift-lshr-128.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-shift-lshr-256.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-shift-shl-128.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-shift-shl-256.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll (+129-129)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll (+35-35)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll (+19-19)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll (+17-17)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll (+41-41)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll (+61-133)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll (+11-21)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (+9-11)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll (+23-23)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-v1.ll (+31-31)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-v192.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/vector-trunc-math.ll (+10-19)
- (modified) llvm/test/CodeGen/X86/vector-trunc-packus.ll (+55-113)
- (modified) llvm/test/CodeGen/X86/vector-trunc-ssat.ll (+59-106)
- (modified) llvm/test/CodeGen/X86/vector-trunc-usat.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/vector-trunc.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-tzcnt-128.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-unsigned-cmp.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-zext.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vselect-constants.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vselect-pcmp.ll (+5-7)
- (modified) llvm/test/CodeGen/X86/vselect.ll (+24-12)
- (modified) llvm/test/CodeGen/X86/widen_arith-5.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/x86-interleaved-access.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg.ll (+24-24)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll (+84-95)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll (+76-86)
``````````diff
diff --git a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
index d4af94c7f92ee7e..ee2bb15f6dcbc33 100644
--- a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
+++ b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
@@ -10,7 +10,7 @@
// replace them with smaller constant pool entries, including:
// * Converting AVX512 memory-fold instructions to their broadcast-fold form
// * Broadcasting of full width loads.
-// * TODO: Sign/Zero extension of full width loads.
+// * TODO: Zero extension of full width loads.
//
//===----------------------------------------------------------------------===//
@@ -216,8 +216,8 @@ static Constant *rebuildConstant(LLVMContext &Ctx, Type *SclTy,
// Attempt to rebuild a normalized splat vector constant of the requested splat
// width, built up of potentially smaller scalar values.
-static Constant *rebuildSplatableConstant(const Constant *C,
- unsigned SplatBitWidth) {
+static Constant *rebuildSplatCst(const Constant *C, unsigned NumElts,
+ unsigned SplatBitWidth) {
std::optional<APInt> Splat = getSplatableConstant(C, SplatBitWidth);
if (!Splat)
return nullptr;
@@ -238,8 +238,8 @@ static Constant *rebuildSplatableConstant(const Constant *C,
return rebuildConstant(OriginalType->getContext(), SclTy, *Splat, NumSclBits);
}
-static Constant *rebuildZeroUpperConstant(const Constant *C,
- unsigned ScalarBitWidth) {
+static Constant *rebuildZeroUpperCst(const Constant *C, unsigned NumElts,
+ unsigned ScalarBitWidth) {
Type *Ty = C->getType();
Type *SclTy = Ty->getScalarType();
unsigned NumBits = Ty->getPrimitiveSizeInBits();
@@ -265,57 +265,93 @@ static Constant *rebuildZeroUpperConstant(const Constant *C,
return nullptr;
}
-typedef std::function<Constant *(const Constant *, unsigned)> RebuildFn;
+static Constant *rebuildExtCst(const Constant *C, bool IsSExt,
+ unsigned NumElts,
+ unsigned SrcEltBitWidth) {
+ Type *Ty = C->getType();
+ unsigned NumBits = Ty->getPrimitiveSizeInBits();
+ unsigned DstEltBitWidth = NumBits / NumElts;
+ assert((NumBits % NumElts) == 0 && (NumBits % SrcEltBitWidth) == 0 &&
+ (DstEltBitWidth % SrcEltBitWidth) == 0 &&
+ (DstEltBitWidth > SrcEltBitWidth) && "Illegal extension width");
+
+ if (std::optional<APInt> Bits = extractConstantBits(C)) {
+ assert((Bits->getBitWidth() / DstEltBitWidth) == NumElts &&
+ (Bits->getBitWidth() % DstEltBitWidth) == 0 &&
+ "Unexpected constant extension");
+
+ // Ensure every vector element can be represented by the src bitwidth.
+ APInt TruncBits = APInt::getZero(NumElts * SrcEltBitWidth);
+ for (unsigned I = 0; I != NumElts; ++I) {
+ APInt Elt = Bits->extractBits(DstEltBitWidth, I * DstEltBitWidth);
+ if ((IsSExt && Elt.getSignificantBits() > SrcEltBitWidth) ||
+ (!IsSExt && Elt.getActiveBits() > SrcEltBitWidth))
+ return nullptr;
+ TruncBits.insertBits(Elt.trunc(SrcEltBitWidth), I * SrcEltBitWidth);
+ }
+
+ return rebuildConstant(Ty->getContext(), Ty->getScalarType(), TruncBits,
+ SrcEltBitWidth);
+ }
+
+ return nullptr;
+}
+static Constant *rebuildSExtCst(const Constant *C, unsigned NumElts,
+ unsigned SrcEltBitWidth) {
+ return rebuildExtCst(C, true, NumElts, SrcEltBitWidth);
+}
bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool();
+ bool HasSSE41 = ST->hasSSE41();
bool HasAVX2 = ST->hasAVX2();
bool HasDQI = ST->hasDQI();
bool HasBWI = ST->hasBWI();
bool HasVLX = ST->hasVLX();
- auto FixupConstant =
- [&](unsigned OpBcst256, unsigned OpBcst128, unsigned OpBcst64,
- unsigned OpBcst32, unsigned OpBcst16, unsigned OpBcst8,
- unsigned OpUpper64, unsigned OpUpper32, unsigned OperandNo) {
- assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) &&
- "Unexpected number of operands!");
-
- if (auto *C = X86::getConstantFromPool(MI, OperandNo)) {
- // Attempt to detect a suitable splat/vzload from increasing constant
- // bitwidths.
- // Prefer vzload vs broadcast for same bitwidth to avoid domain flips.
- std::tuple<unsigned, unsigned, RebuildFn> FixupLoad[] = {
- {8, OpBcst8, rebuildSplatableConstant},
- {16, OpBcst16, rebuildSplatableConstant},
- {32, OpUpper32, rebuildZeroUpperConstant},
- {32, OpBcst32, rebuildSplatableConstant},
- {64, OpUpper64, rebuildZeroUpperConstant},
- {64, OpBcst64, rebuildSplatableConstant},
- {128, OpBcst128, rebuildSplatableConstant},
- {256, OpBcst256, rebuildSplatableConstant},
- };
- for (auto [BitWidth, Op, RebuildConstant] : FixupLoad) {
- if (Op) {
- // Construct a suitable constant and adjust the MI to use the new
- // constant pool entry.
- if (Constant *NewCst = RebuildConstant(C, BitWidth)) {
- unsigned NewCPI =
- CP->getConstantPoolIndex(NewCst, Align(BitWidth / 8));
- MI.setDesc(TII->get(Op));
- MI.getOperand(OperandNo + X86::AddrDisp).setIndex(NewCPI);
- return true;
- }
- }
+ struct FixupEntry {
+ int Op;
+ int NumCstElts;
+ int BitWidth;
+ std::function<Constant *(const Constant *, unsigned, unsigned)>
+ RebuildConstant;
+ };
+ auto FixupConstant = [&](ArrayRef<FixupEntry> Fixups, unsigned OperandNo) {
+#ifdef EXPENSIVE_CHECKS
+ assert(llvm::is_sorted(Fixups,
+ [](const FixupEntry &A, const FixupEntry &B) {
+ return (A.NumCstElts * A.BitWidth) <
+ (B.NumCstElts * B.BitWidth);
+ }) &&
+ "Constant fixup table not sorted in ascending constant size");
+#endif
+ assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) &&
+ "Unexpected number of operands!");
+ if (auto *C = X86::getConstantFromPool(MI, OperandNo)) {
+ for (const FixupEntry &Fixup : Fixups) {
+ if (Fixup.Op) {
+ // Construct a suitable constant and adjust the MI to use the new
+ // constant pool entry.
+ if (Constant *NewCst =
+ Fixup.RebuildConstant(C, Fixup.NumCstElts, Fixup.BitWidth)) {
+ unsigned NewCPI =
+ CP->getConstantPoolIndex(NewCst, Align(Fixup.BitWidth / 8));
+ MI.setDesc(TII->get(Fixup.Op));
+ MI.getOperand(OperandNo + X86::AddrDisp).setIndex(NewCPI);
+ return true;
}
}
- return false;
- };
+ }
+ }
+ return false;
+ };
- // Attempt to convert full width vector loads into broadcast/vzload loads.
+ // Attempt to detect a suitable vzload/broadcast/vextload from increasing
+ // constant bitwidths.
+ // Prefer vzload/broadcast/vextload for same bitwidth to avoid domain flips.
switch (Opc) {
/* FP Loads */
case X86::MOVAPDrm:
@@ -323,82 +359,161 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
case X86::MOVUPDrm:
case X86::MOVUPSrm:
// TODO: SSE3 MOVDDUP Handling
- return FixupConstant(0, 0, 0, 0, 0, 0, X86::MOVSDrm, X86::MOVSSrm, 1);
+ return FixupConstant({{X86::MOVSSrm, 1, 32, rebuildZeroUpperCst},
+ {X86::MOVSDrm, 1, 64, rebuildZeroUpperCst}},
+ 1);
case X86::VMOVAPDrm:
case X86::VMOVAPSrm:
case X86::VMOVUPDrm:
case X86::VMOVUPSrm:
- return FixupConstant(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
- X86::VMOVSDrm, X86::VMOVSSrm, 1);
+ return FixupConstant({{X86::VMOVSSrm, 1, 32, rebuildZeroUpperCst},
+ {X86::VBROADCASTSSrm, 1, 32, rebuildSplatCst},
+ {X86::VMOVSDrm, 1, 64, rebuildZeroUpperCst},
+ {X86::VMOVDDUPrm, 1, 64, rebuildSplatCst}},
+ 1);
case X86::VMOVAPDYrm:
case X86::VMOVAPSYrm:
case X86::VMOVUPDYrm:
case X86::VMOVUPSYrm:
- return FixupConstant(0, X86::VBROADCASTF128rm, X86::VBROADCASTSDYrm,
- X86::VBROADCASTSSYrm, 0, 0, 0, 0, 1);
+ return FixupConstant({{X86::VBROADCASTSSYrm, 1, 32, rebuildSplatCst},
+ {X86::VBROADCASTSDYrm, 1, 64, rebuildSplatCst},
+ {X86::VBROADCASTF128rm, 1, 128, rebuildSplatCst}},
+ 1);
case X86::VMOVAPDZ128rm:
case X86::VMOVAPSZ128rm:
case X86::VMOVUPDZ128rm:
case X86::VMOVUPSZ128rm:
- return FixupConstant(0, 0, X86::VMOVDDUPZ128rm, X86::VBROADCASTSSZ128rm, 0,
- 0, X86::VMOVSDZrm, X86::VMOVSSZrm, 1);
+ return FixupConstant({{X86::VMOVSSZrm, 1, 32, rebuildZeroUpperCst},
+ {X86::VBROADCASTSSZ128rm, 1, 32, rebuildSplatCst},
+ {X86::VMOVSDZrm, 1, 64, rebuildZeroUpperCst},
+ {X86::VMOVDDUPZ128rm, 1, 64, rebuildSplatCst}},
+ 1);
case X86::VMOVAPDZ256rm:
case X86::VMOVAPSZ256rm:
case X86::VMOVUPDZ256rm:
case X86::VMOVUPSZ256rm:
- return FixupConstant(0, X86::VBROADCASTF32X4Z256rm, X86::VBROADCASTSDZ256rm,
- X86::VBROADCASTSSZ256rm, 0, 0, 0, 0, 1);
+ return FixupConstant(
+ {{X86::VBROADCASTSSZ256rm, 1, 32, rebuildSplatCst},
+ {X86::VBROADCASTSDZ256rm, 1, 64, rebuildSplatCst},
+ {X86::VBROADCASTF32X4Z256rm, 1, 128, rebuildSplatCst}},
+ 1);
case X86::VMOVAPDZrm:
case X86::VMOVAPSZrm:
case X86::VMOVUPDZrm:
case X86::VMOVUPSZrm:
- return FixupConstant(X86::VBROADCASTF64X4rm, X86::VBROADCASTF32X4rm,
- X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0, 0, 0, 0,
+ return FixupConstant({{X86::VBROADCASTSSZrm, 1, 32, rebuildSplatCst},
+ {X86::VBROADCASTSDZrm, 1, 64, rebuildSplatCst},
+ {X86::VBROADCASTF32X4rm, 1, 128, rebuildSplatCst},
+ {X86::VBROADCASTF64X4rm, 1, 256, rebuildSplatCst}},
1);
/* Integer Loads */
case X86::MOVDQArm:
- case X86::MOVDQUrm:
- return FixupConstant(0, 0, 0, 0, 0, 0, X86::MOVQI2PQIrm, X86::MOVDI2PDIrm,
- 1);
+ case X86::MOVDQUrm: {
+ FixupEntry Fixups[] = {
+ {HasSSE41 ? X86::PMOVSXBQrm : 0, 2, 8, rebuildSExtCst},
+ {X86::MOVDI2PDIrm, 1, 32, rebuildZeroUpperCst},
+ {HasSSE41 ? X86::PMOVSXBDrm : 0, 4, 8, rebuildSExtCst},
+ {HasSSE41 ? X86::PMOVSXWQrm : 0, 2, 16, rebuildSExtCst},
+ {X86::MOVQI2PQIrm, 1, 64, rebuildZeroUpperCst},
+ {HasSSE41 ? X86::PMOVSXBWrm : 0, 8, 8, rebuildSExtCst},
+ {HasSSE41 ? X86::PMOVSXWDrm : 0, 4, 16, rebuildSExtCst},
+ {HasSSE41 ? X86::PMOVSXDQrm : 0, 2, 32, rebuildSExtCst}};
+ return FixupConstant(Fixups, 1);
+ }
case X86::VMOVDQArm:
- case X86::VMOVDQUrm:
- return FixupConstant(0, 0, HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm,
- HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm,
- HasAVX2 ? X86::VPBROADCASTWrm : 0,
- HasAVX2 ? X86::VPBROADCASTBrm : 0, X86::VMOVQI2PQIrm,
- X86::VMOVDI2PDIrm, 1);
+ case X86::VMOVDQUrm: {
+ FixupEntry Fixups[] = {
+ {HasAVX2 ? X86::VPBROADCASTBrm : 0, 1, 8, rebuildSplatCst},
+ {HasAVX2 ? X86::VPBROADCASTWrm : 0, 1, 16, rebuildSplatCst},
+ {X86::VPMOVSXBQrm, 2, 8, rebuildSExtCst},
+ {X86::VMOVDI2PDIrm, 1, 32, rebuildZeroUpperCst},
+ {HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm, 1, 32,
+ rebuildSplatCst},
+ {X86::VPMOVSXBDrm, 4, 8, rebuildSExtCst},
+ {X86::VPMOVSXWQrm, 2, 16, rebuildSExtCst},
+ {X86::VMOVQI2PQIrm, 1, 64, rebuildZeroUpperCst},
+ {HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm, 1, 64,
+ rebuildSplatCst},
+ {X86::VPMOVSXBWrm, 8, 8, rebuildSExtCst},
+ {X86::VPMOVSXWDrm, 4, 16, rebuildSExtCst},
+ {X86::VPMOVSXDQrm, 2, 32, rebuildSExtCst}};
+ return FixupConstant(Fixups, 1);
+ }
case X86::VMOVDQAYrm:
- case X86::VMOVDQUYrm:
- return FixupConstant(
- 0, HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm,
- HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm,
- HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm,
- HasAVX2 ? X86::VPBROADCASTWYrm : 0, HasAVX2 ? X86::VPBROADCASTBYrm : 0,
- 0, 0, 1);
+ case X86::VMOVDQUYrm: {
+ FixupEntry Fixups[] = {
+ {HasAVX2 ? X86::VPBROADCASTBYrm : 0, 1, 8, rebuildSplatCst},
+ {HasAVX2 ? X86::VPBROADCASTWYrm : 0, 1, 16, rebuildSplatCst},
+ {HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm, 1, 32,
+ rebuildSplatCst},
+ {HasAVX2 ? X86::VPMOVSXBQYrm : 0, 4, 8, rebuildSExtCst},
+ {HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm, 1, 64,
+ rebuildSplatCst},
+ {HasAVX2 ? X86::VPMOVSXBDYrm : 0, 8, 8, rebuildSExtCst},
+ {HasAVX2 ? X86::VPMOVSXWQYrm : 0, 4, 16, rebuildSExtCst},
+ {HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm, 1, 128,
+ rebuildSplatCst},
+ {HasAVX2 ? X86::VPMOVSXBWYrm : 0, 16, 8, rebuildSExtCst},
+ {HasAVX2 ? X86::VPMOVSXWDYrm : 0, 8, 16, rebuildSExtCst},
+ {HasAVX2 ? X86::VPMOVSXDQYrm : 0, 4, 32, rebuildSExtCst}};
+ return FixupConstant(Fixups, 1);
+ }
case X86::VMOVDQA32Z128rm:
case X86::VMOVDQA64Z128rm:
case X86::VMOVDQU32Z128rm:
- case X86::VMOVDQU64Z128rm:
- return FixupConstant(0, 0, X86::VPBROADCASTQZ128rm, X86::VPBROADCASTDZ128rm,
- HasBWI ? X86::VPBROADCASTWZ128rm : 0,
- HasBWI ? X86::VPBROADCASTBZ128rm : 0,
- X86::VMOVQI2PQIZrm, X86::VMOVDI2PDIZrm, 1);
+ case X86::VMOVDQU64Z128rm: {
+ FixupEntry Fixups[] = {
+ {HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1, 8, rebuildSplatCst},
+ {HasBWI ? X86::VPBROADCASTWZ128rm : 0, 1, 16, rebuildSplatCst},
+ {X86::VPMOVSXBQZ128rm, 2, 8, rebuildSExtCst},
+ {X86::VMOVDI2PDIZrm, 1, 32, rebuildZeroUpperCst},
+ {X86::VPBROADCASTDZ128rm, 1, 32, rebuildSplatCst},
+ {X86::VPMOVSXBDZ128rm, 4, 8, rebuildSExtCst},
+ {X86::VPMOVSXWQZ128rm, 2, 16, rebuildSExtCst},
+ {X86::VMOVQI2PQIZrm, 1, 64, rebuildZeroUpperCst},
+ {X86::VPBROADCASTQZ128rm, 1, 64, rebuildSplatCst},
+ {HasBWI ? X86::VPMOVSXBWZ128rm : 0, 8, 8, rebuildSExtCst},
+ {X86::VPMOVSXWDZ128rm, 4, 16, rebuildSExtCst},
+ {X86::VPMOVSXDQZ128rm, 2, 32, rebuildSExtCst}};
+ return FixupConstant(Fixups, 1);
+ }
case X86::VMOVDQA32Z256rm:
case X86::VMOVDQA64Z256rm:
case X86::VMOVDQU32Z256rm:
- case X86::VMOVDQU64Z256rm:
- return FixupConstant(0, X86::VBROADCASTI32X4Z256rm, X86::VPBROADCASTQZ256rm,
- X86::VPBROADCASTDZ256rm,
- HasBWI ? X86::VPBROADCASTWZ256rm : 0,
- HasBWI ? X86::VPBROADCASTBZ256rm : 0, 0, 0, 1);
+ case X86::VMOVDQU64Z256rm: {
+ FixupEntry Fixups[] = {
+ {HasBWI ? X86::VPBROADCASTBZ256rm : 0, 1, 8, rebuildSplatCst},
+ {HasBWI ? X86::VPBROADCASTWZ256rm : 0, 1, 16, rebuildSplatCst},
+ {X86::VPBROADCASTDZ256rm, 1, 32, rebuildSplatCst},
+ {X86::VPMOVSXBQZ256rm, 4, 8, rebuildSExtCst},
+ {X86::VPBROADCASTQZ256rm, 1, 64, rebuildSplatCst},
+ {X86::VPMOVSXBDZ256rm, 8, 8, rebuildSExtCst},
+ {X86::VPMOVSXWQZ256rm, 4, 16, rebuildSExtCst},
+ {X86::VBROADCASTI32X4Z256rm, 1, 128, rebuildSplatCst},
+ {HasBWI ? X86::VPMOVSXBWZ256rm : 0, 16, 8, rebuildSExtCst},
+ {X86::VPMOVSXWDZ256rm, 8, 16, rebuildSExtCst},
+ {X86::VPMOVSXDQZ256rm, 4, 32, rebuildSExtCst}};
+ return FixupConstant(Fixups, 1);
+ }
case X86::VMOVDQA32Zrm:
case X86::VMOVDQA64Zrm:
case X86::VMOVDQU32Zrm:
- case X86::VMOVDQU64Zrm:
- return FixupConstant(X86::VBROADCASTI64X4rm, X86::VBROADCASTI32X4rm,
- X86::VPBROADCASTQZrm, X86::VPBROADCASTDZrm,
- HasBWI ? X86::VPBROADCASTWZrm : 0,
- HasBWI ? X86::VPBROADCASTBZrm : 0, 0, 0, 1);
+ case X86::VMOVDQU64Zrm: {
+ FixupEntry Fixups[] = {
+ {HasBWI ? X86::VPBROADCASTBZrm : 0, 1, 8, rebuildSplatCst},
+ {HasBWI ? X86::VPBROADCASTWZrm : 0, 1, 16, rebuildSplatCst},
+ {X86::VPBROADCASTDZrm, 1, 32, rebuildSplatCst},
+ {X86::VPBROADCASTQZrm, 1, 64, rebuildSplatCst},
+ {X86::VPMOVSXBQZrm, 8, 8, rebuildSExtCst},
+ {X86::VBROADCASTI32X4rm, 1, 128, rebuildSplatCst},
+ {X86::VPMOVSXBDZrm, 16, 8, rebuildSExtCst},
+ {X86::VPMOVSXWQZrm, 8, 16, rebuildSExtCst},
+ {X86::VBROADCASTI64X4rm, 1, 256, rebuildSplatCst},
+ {HasBWI ? X86::VPMOVSXBWZrm : 0, 32, 8, rebuildSExtCst},
+ {X86::VPMOVSXWDZrm, 16, 16, rebuildSExtCst},
+ {X86::VPMOVSXDQZrm, 8, 32, rebuildSExtCst}};
+ return FixupConstant(Fixups, 1);
+ }
}
auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
@@ -423,7 +538,9 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
if (OpBcst32 || OpBcst64) {
unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
- return FixupConstant(0, 0, OpBcst64, OpBcst32, 0, 0, 0, 0, OpNo);
+ FixupEntry Fixups[] = {{(int)OpBcst32, 32, 32, rebuildSplatCst},
+ {(int)OpBcst64, 64, 64, rebuildSplatCst}};
+ return FixupConstant(Fixups, OpNo);
}
return false;
};
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 58ebe023cd61eca..55d7868c530536f 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1582,6 +1582,36 @@ static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer,
}
}
+static bool printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
+ int SrcEltBits, int DstEltBits) {
+ auto *C = X86::getConstantFromPool(*MI, 1);
+ if (C && C->getType()->getScalarSizeInBits() == SrcEltBits) {
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ int NumElts = CDS->getNumElements();
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+
+ const MachineOperand &DstOp = MI->getOperand(0);
+ CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
+ CS << "[";
+ for (int i = 0; i != NumElts; ++i) {
+ if (i != 0)
+ CS << ",";
+ if (CDS->getElementType()->isIntegerTy()) {
+ APInt Elt = CDS->getElementAsAPInt(i).sext(DstEltBits);
+ printConstant(Elt, CS);
+ } else
+ CS << "?";
+ }
+ CS << "]";
+ OutStreamer.AddComment(CS.str());
+ return true;
+ }
+ }
+
+ return false;
+}
+
void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
assert((getSubtarget().isOSWindows() || TM.getTargetTriple().isUEFI()) &&
@@ -1844,7 +1874,7 @@ static void addConstantComments(const MachineInstr *MI,
case X86::VMOVQI2PQIrm:
case X86::VMOVQI2PQIZrm:
printZeroUpperMove(MI, OutStreamer, 64, 128, "mem[0],zero");
- break;
+ break;
case X86::MOVSSrm:
case X86::VMOVSSrm:
@@ -1979,6 +2009,36 @@ static void addConstantComments(const MachineInstr *MI,
case X86::VPBROADCASTBZrm:
printBroadcast(MI, OutStreamer, 64, 8);
break;
+
+#define MOVX_CASE(Prefix, Ext, Type, Suffix) \
+ case X86::Prefix##PMOV##Ext##Type##Suffix##rm:
+
+#define CASE_MOVX_RM(Ext, Type) \
+ MOVX_CASE(, Ex...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/79815
More information about the llvm-commits
mailing list