[clang] [llvm] [msan] Implement support for Arm NEON vst{2,3,4} instructions (PR #99360)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 17 13:35:27 PDT 2024
================
@@ -3742,6 +3751,124 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
+ // Given two shadows AAAA..., BBBB..., return the interleaved value
+ // ABABABAB ...
+ //
+ // Width == number of elements in A == number of elements in B
+ Value *interleaveAB(IRBuilder<> &IRB, Value *left, Value *right, uint Width) {
+ assert(isa<FixedVectorType>(left->getType()));
+ assert(isa<FixedVectorType>(right->getType()));
+ assert(cast<FixedVectorType>(left->getType())->getNumElements() == Width);
+ assert(cast<FixedVectorType>(right->getType())->getNumElements() == Width);
+
+ SmallVector<Constant *> Idxs;
+
+ for (uint i = 0; i < Width; i++) {
+ Idxs.push_back(IRB.getInt32(i));
+ Idxs.push_back(IRB.getInt32(i + Width));
+ }
+
+ return IRB.CreateShuffleVector(left, right, ConstantVector::get(Idxs));
+ }
+
+ // Given three shadows, which are already interleaved into two shadows
+ // ABABABAB and CxCxCxCx (x is undef), return the interleaved value ABCABCABC.
+ //
+ // Note: Width == number of elements in A == number of elements in B
+ // == number of elements in C
+ Value *interleaveABCx(IRBuilder<> &IRB, Value *left, Value *right,
+ uint Width) {
+ assert(isa<FixedVectorType>(left->getType()));
+ assert(isa<FixedVectorType>(right->getType()));
+ assert(cast<FixedVectorType>(left->getType())->getNumElements() == 2 * Width);
+ assert(cast<FixedVectorType>(right->getType())->getNumElements() == 2 * Width);
+
+ SmallVector<Constant *> Idxs;
+
+ // Width parameter is the width of a single shadow (e.g., A).
+ // The width of AB (or Cx) is Width * 2.
+ for (uint i = 0; i < Width * 2; i += 2) {
+ Idxs.push_back(IRB.getInt32(i));
+ Idxs.push_back(IRB.getInt32(i + 1));
+ Idxs.push_back(IRB.getInt32(i + Width));
+ // Index (i + 1 + Width) contains Undef; don't copy
+ }
+
+ return IRB.CreateShuffleVector(left, right, ConstantVector::get(Idxs));
+ }
+
+ /// Calculates the shadow for interleaving 2, 3 or 4 vectors
+ /// (e.g., for Arm NEON vector store).
+ Value *interleaveShadow(IRBuilder<> &IRB, IntrinsicInst &I) {
----------------
thurstond wrote:
Good idea! Done in https://github.com/llvm/llvm-project/pull/99360/commits/41d3a53f770bd78cc5ab7325ac1f8f62437dd73c
https://github.com/llvm/llvm-project/pull/99360
More information about the llvm-commits
mailing list