[clang] [llvm] [msan] Implement support for Arm NEON vst{2,3,4} instructions (PR #99360)

Wed Jul 17 12:50:38 PDT 2024

================
@@ -3865,6 +3866,125 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
+  // Given two shadows AAAA..., BBBB..., return the interleaved value
+  // ABABABAB ...
+  //
+  // Width == number of elements in A == number of elements in B
+  Value *interleaveAB(IRBuilder<> &IRB, Value *left, Value *right, uint Width) {
+    assert(isa<FixedVectorType>(left->getType()));
+    assert(isa<FixedVectorType>(right->getType()));
+    assert(cast<FixedVectorType>(left->getType())->getNumElements() == Width);
+    assert(cast<FixedVectorType>(right->getType())->getNumElements() == Width);
+
+    SmallVector<Constant *> Idxs;
+
+    for (uint i = 0; i < Width; i++) {
+      Idxs.push_back(IRB.getInt32(i));
+      Idxs.push_back(IRB.getInt32(i + Width));
+    }
+
+    return IRB.CreateShuffleVector(left, right, ConstantVector::get(Idxs));
+  }
+
+  // Given three shadows, which are already interleaved into two shadows
+  // ABABABAB and CxCxCxCx (x is undef), return the interleaved value ABCABCABC.
+  //
+  // Note: Width == number of elements in A == number of elements in B
+  //             == number of elements in C
+  Value *interleaveABCx(IRBuilder<> &IRB, Value *left, Value *right,
+                        uint Width) {
+    assert(isa<FixedVectorType>(left->getType()));
+    assert(isa<FixedVectorType>(right->getType()));
+    assert(cast<FixedVectorType>(left->getType())->getNumElements() == 2 * Width);
+    assert(cast<FixedVectorType>(right->getType())->getNumElements() == 2 * Width);
+
+    SmallVector<Constant *> Idxs;
+
+    // Width parameter is the width of a single shadow (e.g., A).
+    // The width of AB (or Cx) is Width * 2.
+    for (uint i = 0; i < Width * 2; i += 2) {
+      Idxs.push_back(IRB.getInt32(i));
+      Idxs.push_back(IRB.getInt32(i + 1));
+      Idxs.push_back(IRB.getInt32(i + Width));
+      // Index (i + 1 + Width) contains Undef; don't copy
+    }
+
+    return IRB.CreateShuffleVector(left, right, ConstantVector::get(Idxs));
+  }
+
+  /// Calculates the shadow for interleaving 2, 3 or 4 vectors
+  /// (e.g., for Arm NEON vector store).
+  Value *interleaveShadow(IRBuilder<> &IRB, IntrinsicInst &I) {
+    // Don't use getNumOperands() because it includes the callee
+    int numArgOperands = I.arg_size();
+    assert(numArgOperands >= 1);
+
+    // The last arg operand is the output
+    int numVectors = numArgOperands - 1;
+
+    for (int i = 0; i < numVectors; i++) {
+      assert(isa<FixedVectorType>(I.getArgOperand(i)->getType()));
+    }
+
+    // Last operand is the destination
+    assert(isa<PointerType>(I.getArgOperand(numArgOperands - 1)->getType()));
+
+    uint16_t Width =
+        cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
+
+    Value *interleaved = nullptr;
+    if (numVectors == 2) {
+      interleaved =
+          interleaveAB(IRB, getShadow(&I, 0), getShadow(&I, 1), Width);
+    } else if (numVectors == 3) {
+      Value *UndefV = UndefValue::get(getShadow(&I, 0)->getType());
+      Value *AB = interleaveAB(IRB, getShadow(&I, 0), getShadow(&I, 1), Width);
+      Value *Cx = interleaveAB(IRB, getShadow(&I, 2), UndefV, Width);
+      interleaved = interleaveABCx(IRB, AB, Cx, Width);
+    } else if (numVectors == 4) {
+      Value *AB = interleaveAB(IRB, getShadow(&I, 0), getShadow(&I, 1), Width);
+      Value *CD = interleaveAB(IRB, getShadow(&I, 2), getShadow(&I, 3), Width);
+      interleaved = interleaveAB(IRB, AB, CD, Width * 2);
+    } else {
+      assert((numVectors >= 2) && (numVectors <= 4));
+    }
+
+    return interleaved;
+  }
+
+  /// Handle Arm NEON vector store intrinsics (vst{2,3,4}).
+  ///
+  /// Arm NEON vector store intrinsics have the output address (pointer) as the
+  /// last argument, with the initial arguments being the inputs. They return
+  /// void.
+  void handleNEONVectorStoreIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+
+    Value *interleavedShadow = interleaveShadow(IRB, I);
+
+    // Don't use getNumOperands() because it includes the callee
+    int numArgOperands = I.arg_size();
+    assert(numArgOperands >= 1);
+
+    // The last arg operand is the output
+    Value *Addr = I.getArgOperand(numArgOperands - 1);
+
+    Value *ShadowPtr, *OriginPtr;
+    std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
+        Addr, IRB, interleavedShadow->getType(), Align(1), /*isStore*/ true);
+    IRB.CreateAlignedStore(interleavedShadow, ShadowPtr, Align(1));
+
+    if (MS.TrackOrigins) {
+      OriginCombiner OC(this, IRB);
+      for (int i = 0; i < numArgOperands - 1; i++)
+        OC.Add(I.getOperand(i));
----------------
vitalybuka wrote:

Please iterate over args, not operands

https://github.com/llvm/llvm-project/pull/99360