[llvm] d9cc5d8 - [AArch64][SVE] Combine bitcasts of predicate types with vector inserts/extracts of loads/stores
Bradley Smith via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 4 08:57:09 PDT 2021
Author: Bradley Smith
Date: 2021-08-04T15:51:14Z
New Revision: d9cc5d84e4d3bf45df1ef87e677e3ec1431b59b5
URL: https://github.com/llvm/llvm-project/commit/d9cc5d84e4d3bf45df1ef87e677e3ec1431b59b5
DIFF: https://github.com/llvm/llvm-project/commit/d9cc5d84e4d3bf45df1ef87e677e3ec1431b59b5.diff
LOG: [AArch64][SVE] Combine bitcasts of predicate types with vector inserts/extracts of loads/stores
An insert subvector that is inserting the result of a vector predicate
sized load into undef at index 0, whose result is casted to a predicate
type, can be combined into a direct predicate load. Likewise the same
applies to extract subvector but in reverse.
The purpose of this optimization is to clean up cases that will be
introduced in a later patch where casts to/from predicate types from i8
types will use insert subvector, rather than going through memory early.
This optimization is done in SVEIntrinsicOpts rather than InstCombine to
re-introduce scalable loads as late as possible, to give other
optimizations the best chance possible to do a good job.
Differential Revision: https://reviews.llvm.org/D106549
Added:
llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
Modified:
llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index 79dcca8f8458..0f12b63df6ea 100644
--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -59,6 +59,10 @@ struct SVEIntrinsicOpts : public ModulePass {
bool coalescePTrueIntrinsicCalls(BasicBlock &BB,
SmallSetVector<IntrinsicInst *, 4> &PTrues);
bool optimizePTrueIntrinsicCalls(SmallSetVector<Function *, 4> &Functions);
+ bool optimizePredicateStore(Instruction *I);
+ bool optimizePredicateLoad(Instruction *I);
+
+ bool optimizeInstructions(SmallSetVector<Function *, 4> &Functions);
/// Operates at the function-scope. I.e., optimizations are applied local to
/// the functions themselves.
@@ -276,11 +280,166 @@ bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls(
return Changed;
}
+// This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce
+// scalable stores as late as possible
+bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) {
+ auto *F = I->getFunction();
+ auto Attr = F->getFnAttribute(Attribute::VScaleRange);
+ if (!Attr.isValid())
+ return false;
+
+ unsigned MinVScale, MaxVScale;
+ std::tie(MinVScale, MaxVScale) = Attr.getVScaleRangeArgs();
+ // The transform needs to know the exact runtime length of scalable vectors
+ if (MinVScale != MaxVScale || MinVScale == 0)
+ return false;
+
+ auto *PredType =
+ ScalableVectorType::get(Type::getInt1Ty(I->getContext()), 16);
+ auto *FixedPredType =
+ FixedVectorType::get(Type::getInt8Ty(I->getContext()), MinVScale * 2);
+
+ // If we have a store..
+ auto *Store = dyn_cast<StoreInst>(I);
+ if (!Store || !Store->isSimple())
+ return false;
+
+ // ..that is storing a predicate vector sized worth of bits..
+ if (Store->getOperand(0)->getType() != FixedPredType)
+ return false;
+
+ // ..where the value stored comes from a vector extract..
+ auto *IntrI = dyn_cast<IntrinsicInst>(Store->getOperand(0));
+ if (!IntrI ||
+ IntrI->getIntrinsicID() != Intrinsic::experimental_vector_extract)
+ return false;
+
+ // ..that is extracting from index 0..
+ if (!cast<ConstantInt>(IntrI->getOperand(1))->isZero())
+ return false;
+
+ // ..where the value being extract from comes from a bitcast
+ auto *BitCast = dyn_cast<BitCastInst>(IntrI->getOperand(0));
+ if (!BitCast)
+ return false;
+
+ // ..and the bitcast is casting from predicate type
+ if (BitCast->getOperand(0)->getType() != PredType)
+ return false;
+
+ IRBuilder<> Builder(I->getContext());
+ Builder.SetInsertPoint(I);
+
+ auto *PtrBitCast = Builder.CreateBitCast(
+ Store->getPointerOperand(),
+ PredType->getPointerTo(Store->getPointerAddressSpace()));
+ Builder.CreateStore(BitCast->getOperand(0), PtrBitCast);
+
+ Store->eraseFromParent();
+ if (IntrI->getNumUses() == 0)
+ IntrI->eraseFromParent();
+ if (BitCast->getNumUses() == 0)
+ BitCast->eraseFromParent();
+
+ return true;
+}
+
+// This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce
+// scalable loads as late as possible
+bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) {
+ auto *F = I->getFunction();
+ auto Attr = F->getFnAttribute(Attribute::VScaleRange);
+ if (!Attr.isValid())
+ return false;
+
+ unsigned MinVScale, MaxVScale;
+ std::tie(MinVScale, MaxVScale) = Attr.getVScaleRangeArgs();
+ // The transform needs to know the exact runtime length of scalable vectors
+ if (MinVScale != MaxVScale || MinVScale == 0)
+ return false;
+
+ auto *PredType =
+ ScalableVectorType::get(Type::getInt1Ty(I->getContext()), 16);
+ auto *FixedPredType =
+ FixedVectorType::get(Type::getInt8Ty(I->getContext()), MinVScale * 2);
+
+ // If we have a bitcast..
+ auto *BitCast = dyn_cast<BitCastInst>(I);
+ if (!BitCast || BitCast->getType() != PredType)
+ return false;
+
+ // ..whose operand is a vector_insert..
+ auto *IntrI = dyn_cast<IntrinsicInst>(BitCast->getOperand(0));
+ if (!IntrI ||
+ IntrI->getIntrinsicID() != Intrinsic::experimental_vector_insert)
+ return false;
+
+ // ..that is inserting into index zero of an undef vector..
+ if (!isa<UndefValue>(IntrI->getOperand(0)) ||
+ !cast<ConstantInt>(IntrI->getOperand(2))->isZero())
+ return false;
+
+ // ..where the value inserted comes from a load..
+ auto *Load = dyn_cast<LoadInst>(IntrI->getOperand(1));
+ if (!Load || !Load->isSimple())
+ return false;
+
+ // ..that is loading a predicate vector sized worth of bits..
+ if (Load->getType() != FixedPredType)
+ return false;
+
+ IRBuilder<> Builder(I->getContext());
+ Builder.SetInsertPoint(Load);
+
+ auto *PtrBitCast = Builder.CreateBitCast(
+ Load->getPointerOperand(),
+ PredType->getPointerTo(Load->getPointerAddressSpace()));
+ auto *LoadPred = Builder.CreateLoad(PredType, PtrBitCast);
+
+ BitCast->replaceAllUsesWith(LoadPred);
+ BitCast->eraseFromParent();
+ if (IntrI->getNumUses() == 0)
+ IntrI->eraseFromParent();
+ if (Load->getNumUses() == 0)
+ Load->eraseFromParent();
+
+ return true;
+}
+
+bool SVEIntrinsicOpts::optimizeInstructions(
+ SmallSetVector<Function *, 4> &Functions) {
+ bool Changed = false;
+
+ for (auto *F : Functions) {
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
+
+ // Traverse the DT with an rpo walk so we see defs before uses, allowing
+ // simplification to be done incrementally.
+ BasicBlock *Root = DT->getRoot();
+ ReversePostOrderTraversal<BasicBlock *> RPOT(Root);
+ for (auto *BB : RPOT) {
+ for (Instruction &I : make_early_inc_range(*BB)) {
+ switch (I.getOpcode()) {
+ case Instruction::Store:
+ Changed |= optimizePredicateStore(&I);
+ break;
+ case Instruction::BitCast:
+ Changed |= optimizePredicateLoad(&I);
+ break;
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
+
bool SVEIntrinsicOpts::optimizeFunctions(
SmallSetVector<Function *, 4> &Functions) {
bool Changed = false;
Changed |= optimizePTrueIntrinsicCalls(Functions);
+ Changed |= optimizeInstructions(Functions);
return Changed;
}
@@ -297,6 +456,8 @@ bool SVEIntrinsicOpts::runOnModule(Module &M) {
continue;
switch (F.getIntrinsicID()) {
+ case Intrinsic::experimental_vector_extract:
+ case Intrinsic::experimental_vector_insert:
case Intrinsic::aarch64_sve_ptrue:
for (User *U : F.users())
Functions.insert(cast<Instruction>(U)->getFunction());
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
new file mode 100644
index 000000000000..40648755ae18
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
@@ -0,0 +1,86 @@
+; RUN: opt -S -aarch64-sve-intrinsic-opts < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @pred_store_v2i8(<vscale x 16 x i1> %pred, <2 x i8>* %addr) #0 {
+; CHECK-LABEL: @pred_store_v2i8(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i8>* %addr to <vscale x 16 x i1>*
+; CHECK-NEXT: store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT: ret void
+ %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
+ %extract = tail call <2 x i8> @llvm.experimental.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
+ store <2 x i8> %extract, <2 x i8>* %addr, align 4
+ ret void
+}
+
+define void @pred_store_v4i8(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #1 {
+; CHECK-LABEL: @pred_store_v4i8(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8>* %addr to <vscale x 16 x i1>*
+; CHECK-NEXT: store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT: ret void
+ %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
+ %extract = tail call <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
+ store <4 x i8> %extract, <4 x i8>* %addr, align 4
+ ret void
+}
+
+define void @pred_store_v8i8(<vscale x 16 x i1> %pred, <8 x i8>* %addr) #2 {
+; CHECK-LABEL: @pred_store_v8i8(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* %addr to <vscale x 16 x i1>*
+; CHECK-NEXT: store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT: ret void
+ %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
+ %extract = tail call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
+ store <8 x i8> %extract, <8 x i8>* %addr, align 4
+ ret void
+}
+
+
+; Check that too small of a vscale prevents optimization
+define void @pred_store_neg1(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #0 {
+; CHECK-LABEL: @pred_store_neg1(
+; CHECK: call <4 x i8> @llvm.experimental.vector.extract
+ %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
+ %extract = tail call <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
+ store <4 x i8> %extract, <4 x i8>* %addr, align 4
+ ret void
+}
+
+; Check that too large of a vscale prevents optimization
+define void @pred_store_neg2(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #2 {
+; CHECK-LABEL: @pred_store_neg2(
+; CHECK: call <4 x i8> @llvm.experimental.vector.extract
+ %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
+ %extract = tail call <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
+ store <4 x i8> %extract, <4 x i8>* %addr, align 4
+ ret void
+}
+
+; Check that a non-zero index prevents optimization
+define void @pred_store_neg3(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #1 {
+; CHECK-LABEL: @pred_store_neg3(
+; CHECK: call <4 x i8> @llvm.experimental.vector.extract
+ %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
+ %extract = tail call <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 4)
+ store <4 x i8> %extract, <4 x i8>* %addr, align 4
+ ret void
+}
+
+; Check that
diff ering vscale min/max prevents optimization
+define void @pred_store_neg4(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #3 {
+; CHECK-LABEL: @pred_store_neg4(
+; CHECK: call <4 x i8> @llvm.experimental.vector.extract
+ %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
+ %extract = tail call <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
+ store <4 x i8> %extract, <4 x i8>* %addr, align 4
+ ret void
+}
+
+declare <2 x i8> @llvm.experimental.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8>, i64)
+declare <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8>, i64)
+declare <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8>, i64)
+
+attributes #0 = { "target-features"="+sve" vscale_range(1,1) }
+attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
+attributes #2 = { "target-features"="+sve" vscale_range(4,4) }
+attributes #3 = { "target-features"="+sve" vscale_range(2,4) }
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
new file mode 100644
index 000000000000..7f358fe6d14e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
@@ -0,0 +1,114 @@
+; RUN: opt -S -aarch64-sve-intrinsic-opts < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 16 x i1> @pred_load_v2i8(<2 x i8>* %addr) #0 {
+; CHECK-LABEL: @pred_load_v2i8(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i8>* %addr to <vscale x 16 x i1>*
+; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
+ %load = load <2 x i8>, <2 x i8>* %addr, align 4
+ %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
+ %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
+ ret <vscale x 16 x i1> %ret
+}
+
+define <vscale x 16 x i1> @pred_load_v4i8(<4 x i8>* %addr) #1 {
+; CHECK-LABEL: @pred_load_v4i8(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8>* %addr to <vscale x 16 x i1>*
+; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
+ %load = load <4 x i8>, <4 x i8>* %addr, align 4
+ %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
+ %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
+ ret <vscale x 16 x i1> %ret
+}
+
+define <vscale x 16 x i1> @pred_load_v8i8(<8 x i8>* %addr) #2 {
+; CHECK-LABEL: @pred_load_v8i8(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* %addr to <vscale x 16 x i1>*
+; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
+ %load = load <8 x i8>, <8 x i8>* %addr, align 4
+ %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> %load, i64 0)
+ %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
+ ret <vscale x 16 x i1> %ret
+}
+
+; Ensure the insertion point is at the load
+define <vscale x 16 x i1> @pred_load_insertion_point(<2 x i8>* %addr) #0 {
+; CHECK-LABEL: @pred_load_insertion_point(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i8>* %addr to <vscale x 16 x i1>*
+; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT: br label %bb1
+; CHECK: bb1:
+; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
+entry:
+ %load = load <2 x i8>, <2 x i8>* %addr, align 4
+ br label %bb1
+
+bb1:
+ %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
+ %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
+ ret <vscale x 16 x i1> %ret
+}
+
+; Check that too small of a vscale prevents optimization
+define <vscale x 16 x i1> @pred_load_neg1(<4 x i8>* %addr) #0 {
+; CHECK-LABEL: @pred_load_neg1(
+; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
+ %load = load <4 x i8>, <4 x i8>* %addr, align 4
+ %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
+ %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
+ ret <vscale x 16 x i1> %ret
+}
+
+; Check that too large of a vscale prevents optimization
+define <vscale x 16 x i1> @pred_load_neg2(<4 x i8>* %addr) #2 {
+; CHECK-LABEL: @pred_load_neg2(
+; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
+ %load = load <4 x i8>, <4 x i8>* %addr, align 4
+ %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
+ %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
+ ret <vscale x 16 x i1> %ret
+}
+
+; Check that a non-zero index prevents optimization
+define <vscale x 16 x i1> @pred_load_neg3(<4 x i8>* %addr) #1 {
+; CHECK-LABEL: @pred_load_neg3(
+; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
+ %load = load <4 x i8>, <4 x i8>* %addr, align 4
+ %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 4)
+ %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
+ ret <vscale x 16 x i1> %ret
+}
+
+; Check that
diff ering vscale min/max prevents optimization
+define <vscale x 16 x i1> @pred_load_neg4(<4 x i8>* %addr) #3 {
+; CHECK-LABEL: @pred_load_neg4(
+; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
+ %load = load <4 x i8>, <4 x i8>* %addr, align 4
+ %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
+ %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
+ ret <vscale x 16 x i1> %ret
+}
+
+; Check that insertion into a non-undef vector prevents optimization
+define <vscale x 16 x i1> @pred_load_neg5(<4 x i8>* %addr, <vscale x 2 x i8> %passthru) #1 {
+; CHECK-LABEL: @pred_load_neg5(
+; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
+ %load = load <4 x i8>, <4 x i8>* %addr, align 4
+ %insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> %passthru, <4 x i8> %load, i64 0)
+ %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
+ ret <vscale x 16 x i1> %ret
+}
+
+declare <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8>, <2 x i8>, i64)
+declare <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8>, <4 x i8>, i64)
+declare <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8>, <8 x i8>, i64)
+
+attributes #0 = { "target-features"="+sve" vscale_range(1,1) }
+attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
+attributes #2 = { "target-features"="+sve" vscale_range(4,4) }
+attributes #3 = { "target-features"="+sve" vscale_range(2,4) }
More information about the llvm-commits
mailing list