[llvm] [msan] Handle AVX512 vector down convert (non-mem) intrinsics (PR #147606)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 8 15:54:06 PDT 2025
================
@@ -4592,6 +4592,91 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
ConstantInt::get(IRB.getInt32Ty(), 0));
}
+ // Handle llvm.x86.avx512.mask.pmov{,s,us}.*.512
+ //
+ // e.g., call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512
+ // (<8 x i64>, <16 x i8>, i8)
+ // A WriteThru Mask
+ //
+ // call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512
+ // (<16 x i32>, <16 x i8>, i16)
+ //
+ // Dst[i] = Mask[i] ? truncate_or_saturate(A[i]) : WriteThru[i]
+ // Dst_shadow[i] = Mask[i] ? truncate(A_shadow[i]) : WriteThru_shadow[i]
+ //
+ // If Dst has more elements than A, the excess elements are zeroed (and the
+ // corresponding shadow is initialized).
+ //
+ // Note: for PMOV (truncation), handleIntrinsicByApplyingToShadow is precise
+ // and is much faster than this handler.
+ void handleAVX512VectorDownConvert(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+
+ assert(I.arg_size() == 3);
+ Value *A = I.getOperand(0);
+ Value *WriteThrough = I.getOperand(1);
+ Value *Mask = I.getOperand(2);
+
+ assert(isa<FixedVectorType>(A->getType()));
+ assert(A->getType()->isIntOrIntVectorTy());
+
+ assert(isa<FixedVectorType>(WriteThrough->getType()));
+ assert(WriteThrough->getType()->isIntOrIntVectorTy());
+
+ unsigned ANumElements =
+ cast<FixedVectorType>(A->getType())->getNumElements();
+ unsigned OutputNumElements =
+ cast<FixedVectorType>(WriteThrough->getType())->getNumElements();
+ assert(ANumElements == OutputNumElements ||
+ ANumElements * 2 == OutputNumElements);
+
+ assert(Mask->getType()->isIntegerTy());
+ assert(Mask->getType()->getScalarSizeInBits() == ANumElements);
+
+ assert(I.getType() == WriteThrough->getType());
+
+ // Widen the mask, if necessary, to have one bit per element of the output
+ // vector.
+ // We want the extra bits to have '1's, so that the CreateSelect will
+ // select the values from AShadow instead of WriteThroughShadow ("maskless"
+ // versions of the intrinsics are sometimes implemented using an all-1's
+ // mask and an undefined value for WriteThroughShadow). We accomplish this
+ // by using bitwise NOT before and after the ZExt.
+ if (ANumElements != OutputNumElements) {
+ Mask = IRB.CreateNot(Mask);
+ Mask = IRB.CreateZExt(Mask, Type::getIntNTy(*MS.C, OutputNumElements),
+ "_ms_widen_mask");
+ Mask = IRB.CreateNot(Mask);
+ }
+ Mask = IRB.CreateBitCast(
+ Mask, FixedVectorType::get(IRB.getInt1Ty(), OutputNumElements));
+
+ Value *AShadow = getShadow(A);
+
+ // The return type might have more elements than the input.
+ // Temporarily shrink the return type's number of elements.
+ VectorType *ShadowType = maybeShrinkVectorShadowType(A, I);
+
+ // PMOV truncates; PMOVS/PMOVUS uses signed/unsigned saturation.
+ // This handler treats them all as truncation, which leads to some rare
+ // false positives in the cases where the truncated bytes could
+ // unambiguously saturate the value e.g., if A = ??????10 ????????
+ // (big-endian), the unsigned saturated byte conversion is 11111111 i.e.,
+ // fully defined, but the truncated byte is ????????.
+ //
+ // TODO: use GetMinMaxUnsigned() to handle saturation precisely.
+ AShadow = IRB.CreateTrunc(AShadow, ShadowType, "_ms_trunc_shadow");
----------------
thurstond wrote:
Reduced, but leaving enough to honor our SpaceX colleagues
https://github.com/llvm/llvm-project/pull/147606
More information about the llvm-commits
mailing list