[llvm] [LV] Ignore some costs when loop gets fully unrolled (PR #106699)

Igor Kirillov via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 3 09:21:44 PST 2024


https://github.com/igogo-x86 updated https://github.com/llvm/llvm-project/pull/106699

>From 79cb34ce6bc29ed6a3f7a1dd2925931cfd39f596 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Fri, 30 Aug 2024 09:22:21 +0000
Subject: [PATCH 1/7] [LV] Ignore some costs when loop gets fully unrolled

When VF has a fixed width and equals the number of iterations, and we are not
tail folding by masking, comparison instruction and induction operation will
be DCEed later.
Ignoring the costs of these instructions improves the cost model.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3c7c044a042719..29a20a67fd783f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7248,6 +7248,26 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
         continue;
       IVInsts.push_back(CI);
     }
+
+    // If the given VF loop gets fully unrolled, ignore the costs of comparison
+    // and increment instruction, as they'll get simplified away
+    auto TC = CM.PSE.getSE()->getSmallConstantTripCount(OrigLoop);
+    auto *Cmp = OrigLoop->getLatchCmpInst();
+    if (Cmp && VF.isFixed() && VF.getFixedValue() == TC) {
+      CostCtx.SkipCostComputation.insert(Cmp);
+      for (Instruction *IVInst : IVInsts) {
+        bool IsSimplifiedAway = true;
+        for (auto *UIV : IVInst->users()) {
+          if (!Legal->isInductionVariable(UIV) && UIV != Cmp) {
+            IsSimplifiedAway = false;
+            break;
+          }
+        }
+        if (IsSimplifiedAway)
+          CostCtx.SkipCostComputation.insert(IVInst);
+      }
+    }
+
     for (Instruction *IVInst : IVInsts) {
       if (CostCtx.skipCostComputation(IVInst, VF.isVector()))
         continue;

>From 2549d0a45379b554fa4442d76f492aae62433773 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Wed, 2 Oct 2024 09:05:33 +0000
Subject: [PATCH 2/7] Update to pass assertion comparing two cost models

---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 36 +++++++++++--------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 29a20a67fd783f..af9c6aa3fa033a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5559,6 +5559,20 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
 InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
   InstructionCost Cost;
 
+  // If with the given VF loop gets fully unrolled, ignore the costs of
+  // comparison and induction instructions, as they'll get simplified away
+  SmallPtrSet<const Value *, 16> ValuesToIgnoreForVF;
+  auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
+  auto *Cmp = TheLoop->getLatchCmpInst();
+  if (Cmp && TC == VF.getKnownMinValue()) {
+    ValuesToIgnoreForVF.insert(Cmp);
+    for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
+      Instruction *IVInc = cast<Instruction>(
+          IV->getIncomingValueForBlock(TheLoop->getLoopLatch()));
+      ValuesToIgnoreForVF.insert(IVInc);
+    }
+  }
+
   // For each block.
   for (BasicBlock *BB : TheLoop->blocks()) {
     InstructionCost BlockCost;
@@ -5566,7 +5580,7 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
     // For each instruction in the old loop.
     for (Instruction &I : BB->instructionsWithoutDebug()) {
       // Skip ignored values.
-      if (ValuesToIgnore.count(&I) ||
+      if (ValuesToIgnore.count(&I) || ValuesToIgnoreForVF.count(&I) ||
           (VF.isVector() && VecValuesToIgnore.count(&I)))
         continue;
 
@@ -7249,22 +7263,16 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
       IVInsts.push_back(CI);
     }
 
-    // If the given VF loop gets fully unrolled, ignore the costs of comparison
-    // and increment instruction, as they'll get simplified away
+    // If with the given VF loop gets fully unrolled, ignore the costs of
+    // comparison and induction instructions, as they'll get simplified away
     auto TC = CM.PSE.getSE()->getSmallConstantTripCount(OrigLoop);
     auto *Cmp = OrigLoop->getLatchCmpInst();
-    if (Cmp && VF.isFixed() && VF.getFixedValue() == TC) {
+    if (Cmp && TC == VF.getKnownMinValue()) {
       CostCtx.SkipCostComputation.insert(Cmp);
-      for (Instruction *IVInst : IVInsts) {
-        bool IsSimplifiedAway = true;
-        for (auto *UIV : IVInst->users()) {
-          if (!Legal->isInductionVariable(UIV) && UIV != Cmp) {
-            IsSimplifiedAway = false;
-            break;
-          }
-        }
-        if (IsSimplifiedAway)
-          CostCtx.SkipCostComputation.insert(IVInst);
+      for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
+        Instruction *IVInc = cast<Instruction>(
+            IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
+        CostCtx.SkipCostComputation.insert(IVInc);
       }
     }
 

>From d2272da13be8287ebc07d6999e8a34b797ca5470 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Tue, 12 Nov 2024 13:51:21 +0000
Subject: [PATCH 3/7] Check that induction variable has no unsimplifiable users

Add AArch64 test
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 59 ++++++++++++-------
 1 file changed, 37 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index af9c6aa3fa033a..0d715e0ee07cb9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2652,6 +2652,33 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
   return I->second;
 }
 
+/// Knowing that loop \p L would be fully unrolled after vectorisation, add
+/// instructions that will get simplified and thus should not have any cost to
+/// \p InstsToIgnore
+static void AddFullyUnrolledInstructionsToIgnore(
+    Loop *L, const LoopVectorizationLegality::InductionList &IL,
+    SmallPtrSetImpl<Instruction *> &InstsToIgnore) {
+  auto *Cmp = L->getLatchCmpInst();
+  if (!Cmp)
+    return;
+  InstsToIgnore.insert(Cmp);
+  for (const auto &[IV, IndDesc] : IL) {
+    // Get next iteration value of the induction variable
+    Instruction *IVInst =
+        cast<Instruction>(IV->getIncomingValueForBlock(L->getLoopLatch()));
+    bool IsSimplifiedAway = true;
+    // Check that this value used only to exit the loop
+    for (auto *UIV : IVInst->users()) {
+      if (UIV != IV && UIV != Cmp) {
+        IsSimplifiedAway = false;
+        break;
+      }
+    }
+    if (IsSimplifiedAway)
+      InstsToIgnore.insert(IVInst);
+  }
+}
+
 void InnerLoopVectorizer::createInductionResumeValues(
     const SCEV2ValueTy &ExpandedSCEVs,
     std::pair<BasicBlock *, Value *> AdditionalBypass) {
@@ -5559,19 +5586,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
 InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
   InstructionCost Cost;
 
-  // If with the given VF loop gets fully unrolled, ignore the costs of
-  // comparison and induction instructions, as they'll get simplified away
-  SmallPtrSet<const Value *, 16> ValuesToIgnoreForVF;
+  // If with the given fixed width VF loop gets fully unrolled, ignore the costs
+  // of comparison and induction instructions, as they'll get simplified away
+  SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
   auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
-  auto *Cmp = TheLoop->getLatchCmpInst();
-  if (Cmp && TC == VF.getKnownMinValue()) {
-    ValuesToIgnoreForVF.insert(Cmp);
-    for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
-      Instruction *IVInc = cast<Instruction>(
-          IV->getIncomingValueForBlock(TheLoop->getLoopLatch()));
-      ValuesToIgnoreForVF.insert(IVInc);
-    }
-  }
+  if (VF.isFixed() && TC == VF.getFixedValue())
+    AddFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
+                                         ValuesToIgnoreForVF);
 
   // For each block.
   for (BasicBlock *BB : TheLoop->blocks()) {
@@ -7265,16 +7286,10 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
 
     // If with the given VF loop gets fully unrolled, ignore the costs of
     // comparison and induction instructions, as they'll get simplified away
-    auto TC = CM.PSE.getSE()->getSmallConstantTripCount(OrigLoop);
-    auto *Cmp = OrigLoop->getLatchCmpInst();
-    if (Cmp && TC == VF.getKnownMinValue()) {
-      CostCtx.SkipCostComputation.insert(Cmp);
-      for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
-        Instruction *IVInc = cast<Instruction>(
-            IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
-        CostCtx.SkipCostComputation.insert(IVInc);
-      }
-    }
+    auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
+    if (VF.isFixed() && TC == VF.getFixedValue())
+      AddFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
+                                           CostCtx.SkipCostComputation);
 
     for (Instruction *IVInst : IVInsts) {
       if (CostCtx.skipCostComputation(IVInst, VF.isVector()))

>From 1722aea1206c083f85964dcfce57ca1e20880ef3 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Fri, 15 Nov 2024 14:35:22 +0000
Subject: [PATCH 4/7] Addressing suggestions

* Fixing comments
* Adding more tests
* Remove cmp latch presence requirements
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 42 +++++++++----------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0d715e0ee07cb9..823fd603554d65 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2652,29 +2652,21 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
   return I->second;
 }
 
-/// Knowing that loop \p L would be fully unrolled after vectorisation, add
-/// instructions that will get simplified and thus should not have any cost to
-/// \p InstsToIgnore
-static void AddFullyUnrolledInstructionsToIgnore(
+/// Knowing that loop \p L executes a single vector iteration, add instructions
+/// that will get simplified and thus should not have any cost to \p
+/// InstsToIgnore.
+static void addFullyUnrolledInstructionsToIgnore(
     Loop *L, const LoopVectorizationLegality::InductionList &IL,
     SmallPtrSetImpl<Instruction *> &InstsToIgnore) {
   auto *Cmp = L->getLatchCmpInst();
-  if (!Cmp)
-    return;
-  InstsToIgnore.insert(Cmp);
+  if (Cmp)
+    InstsToIgnore.insert(Cmp);
   for (const auto &[IV, IndDesc] : IL) {
-    // Get next iteration value of the induction variable
+    // Get next iteration value of the induction variable.
     Instruction *IVInst =
         cast<Instruction>(IV->getIncomingValueForBlock(L->getLoopLatch()));
-    bool IsSimplifiedAway = true;
-    // Check that this value used only to exit the loop
-    for (auto *UIV : IVInst->users()) {
-      if (UIV != IV && UIV != Cmp) {
-        IsSimplifiedAway = false;
-        break;
-      }
-    }
-    if (IsSimplifiedAway)
+    if (all_of(IVInst->users(),
+               [&](const User *U) { return U == IV || U == Cmp; }))
       InstsToIgnore.insert(IVInst);
   }
 }
@@ -5586,12 +5578,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
 InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
   InstructionCost Cost;
 
-  // If with the given fixed width VF loop gets fully unrolled, ignore the costs
-  // of comparison and induction instructions, as they'll get simplified away
+  // If the vector loop gets executed exactly once with the given VF, ignore the
+  // costs of comparison and induction instructions, as they'll get simplified
+  // away.
   SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
   auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
   if (VF.isFixed() && TC == VF.getFixedValue())
-    AddFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
+    addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
                                          ValuesToIgnoreForVF);
 
   // For each block.
@@ -7284,11 +7277,14 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
       IVInsts.push_back(CI);
     }
 
-    // If with the given VF loop gets fully unrolled, ignore the costs of
-    // comparison and induction instructions, as they'll get simplified away
+    // If the vector loop gets executed exactly once with the given VF, ignore
+    // the costs of comparison and induction instructions, as they'll get
+    // simplified away.
+    // TODO: Remove this code after stepping away from the legacy cost model and
+    // adding code to simplify VPlans before calculating their costs.
     auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
     if (VF.isFixed() && TC == VF.getFixedValue())
-      AddFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
+      addFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
                                            CostCtx.SkipCostComputation);
 
     for (Instruction *IVInst : IVInsts) {

>From 6955f28e34b9306c5eeba4c5e32b01abafdd35e0 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Tue, 19 Nov 2024 12:19:27 +0000
Subject: [PATCH 5/7] Reduce cost only when not tail-folding

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 823fd603554d65..6cc177380ee9e2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5583,7 +5583,7 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
   // away.
   SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
   auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
-  if (VF.isFixed() && TC == VF.getFixedValue())
+  if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking())
     addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
                                          ValuesToIgnoreForVF);
 
@@ -7283,7 +7283,7 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
     // TODO: Remove this code after stepping away from the legacy cost model and
     // adding code to simplify VPlans before calculating their costs.
     auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
-    if (VF.isFixed() && TC == VF.getFixedValue())
+    if (VF.isFixed() && TC == VF.getFixedValue() && !CM.foldTailByMasking())
       addFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
                                            CostCtx.SkipCostComputation);
 

>From 7bd5e396eb88faced594d5999026bdac022f5a17 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Mon, 2 Dec 2024 15:17:30 +0000
Subject: [PATCH 6/7] tmp

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6cc177380ee9e2..815f0c666ee20e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5583,9 +5583,11 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
   // away.
   SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
   auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
-  if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking())
+  if (VF.isFixed() && TC == VF.getFixedValue()) {
+    assert(!foldTailByMasking());
     addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
                                          ValuesToIgnoreForVF);
+  }
 
   // For each block.
   for (BasicBlock *BB : TheLoop->blocks()) {

>From b494e64cb81453a0bd5c3cf6c7b6b1b695c13f91 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <igor.kirillov at arm.com>
Date: Tue, 3 Dec 2024 16:59:11 +0000
Subject: [PATCH 7/7] Rebase and update after pre-commiting tests

---
 .../LoopVectorize/AArch64/fully-unrolled-cost.ll   | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll
index ab29bf8d2d52a3..c1ede2410238ca 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll
@@ -12,12 +12,10 @@ define i64 @test(ptr %a, ptr %b) #0 {
 ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction   %exitcond.not = icmp eq i64 %i.iv.next, 16
 ; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
 ; CHECK: Cost for VF 8: 26
-; CHECK-NEXT: Cost of 1 for VF 16: induction instruction   %i.iv.next = add nuw nsw i64 %i.iv, 1
 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction   %exitcond.not = icmp eq i64 %i.iv.next, 16
 ; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK: Cost for VF 16: 50
-; CHECK: LV: Selecting VF: vscale x 2
+; CHECK: Cost for VF 16: 48
+; CHECK: LV: Selecting VF: 16
 entry:
   br label %for.body
 
@@ -50,9 +48,8 @@ define i64 @test_external_iv_user(ptr %a, ptr %b) #0 {
 ; CHECK: Cost for VF 8: 26
 ; CHECK-NEXT: Cost of 1 for VF 16: induction instruction   %i.iv.next = add nuw nsw i64 %i.iv, 1
 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction   %exitcond.not = icmp eq i64 %i.iv.next, 16
 ; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK: Cost for VF 16: 50
+; CHECK: Cost for VF 16: 49
 ; CHECK: LV: Selecting VF: vscale x 2
 entry:
   br label %for.body
@@ -86,13 +83,10 @@ define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 {
 ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction   %exitcond.not = icmp eq i64 %i.iv.next, 16
 ; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
 ; CHECK: Cost for VF 8: 27
-; CHECK-NEXT: Cost of 1 for VF 16: induction instruction   %i.iv.next = add nuw nsw i64 %i.iv, 1
 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 1 for VF 16: induction instruction   %j.iv.next = add nuw nsw i64 %j.iv, 1
 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
-; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction   %exitcond.not = icmp eq i64 %i.iv.next, 16
 ; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
-; CHECK: Cost for VF 16: 51
+; CHECK: Cost for VF 16: 48
 ; CHECK: LV: Selecting VF: 16
 entry:
   br label %for.body



More information about the llvm-commits mailing list