[libc-commits] [libc] b254a2a - [libc][automemcpy] Add mean/variance and simplify implementation
Guillaume Chatelet via libc-commits
libc-commits at lists.llvm.org
Thu Feb 17 04:11:23 PST 2022
Author: Guillaume Chatelet
Date: 2022-02-17T12:11:05Z
New Revision: b254a2a703407468ef471630d9dd7b0667d45229
URL: https://github.com/llvm/llvm-project/commit/b254a2a703407468ef471630d9dd7b0667d45229
DIFF: https://github.com/llvm/llvm-project/commit/b254a2a703407468ef471630d9dd7b0667d45229.diff
LOG: [libc][automemcpy] Add mean/variance and simplify implementation
Differential Revision: https://reviews.llvm.org/D120031
Added:
Modified:
libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h
libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp
libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp
Removed:
################################################################################
diff --git a/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h b/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h
index 845c3e1e1180f..9b861c6250611 100644
--- a/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h
+++ b/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h
@@ -49,9 +49,12 @@ struct FunctionId {
};
struct PerDistributionData {
- double MedianBytesPerSecond; // Median of samples for this distribution.
- double Score; // Normalized score for this distribution.
- Grade::GradeEnum Grade; // Grade for this distribution.
+ std::vector<double> BytesPerSecondSamples;
+ double BytesPerSecondMedian; // Median of samples for this distribution.
+ double BytesPerSecondMean; // Mean of samples for this distribution.
+ double BytesPerSecondVariance; // Variance of samples for this distribution.
+ double Score; // Normalized score for this distribution.
+ Grade::GradeEnum Grade; // Grade for this distribution.
};
struct FunctionData {
diff --git a/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp b/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp
index 00298f69f77f6..ed9cd1f286c2c 100644
--- a/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp
+++ b/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp
@@ -76,29 +76,48 @@ Grade::GradeEnum Grade::judge(double Score) {
return BAD;
}
+static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
+ const double SampleMean) {
+ assert(!Samples.empty());
+ if (Samples.size() == 1)
+ return 0;
+ double DiffSquaresSum = 0;
+ for (const double S : Samples) {
+ const double Diff = S - SampleMean;
+ DiffSquaresSum += Diff * Diff;
+ }
+ return DiffSquaresSum / (Samples.size() - 1);
+}
+
+static void processPerDistributionData(PerDistributionData &Data) {
+ auto &Samples = Data.BytesPerSecondSamples;
+ assert(!Samples.empty());
+ // Sample Mean
+ const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
+ Data.BytesPerSecondMean = Sum / Samples.size();
+ // Unbiased Sample Variance
+ Data.BytesPerSecondVariance =
+ computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
+ // Median
+ const size_t HalfSize = Samples.size() / 2;
+ std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
+ Data.BytesPerSecondMedian = Samples[HalfSize];
+}
+
std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
- std::unordered_map<SampleId, std::vector<double>, SampleId::Hasher>
- BucketedSamples;
- for (const auto &S : Samples)
- BucketedSamples[S.Id].push_back(S.BytesPerSecond);
- std::unordered_map<FunctionId, StringMap<double>, FunctionId::Hasher>
- Throughputs;
- for (auto &Pair : BucketedSamples) {
- const auto &Id = Pair.first;
- auto &Values = Pair.second;
- const size_t HalfSize = Values.size() / 2;
- std::nth_element(Values.begin(), Values.begin() + HalfSize, Values.end());
- const double MedianValue = Values[HalfSize];
- Throughputs[Id.Function][Id.Distribution.Name] = MedianValue;
+ std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
+ for (const auto &S : Samples) {
+ auto &Function = Functions[S.Id.Function];
+ auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
+ Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
}
+
std::vector<FunctionData> Output;
- for (auto &Pair : Throughputs) {
- FunctionData Data;
- Data.Id = Pair.first;
- for (const auto &Pair : Pair.second)
- Data.PerDistributionData[Pair.getKey()].MedianBytesPerSecond =
- Pair.getValue();
- Output.push_back(std::move(Data));
+ for (auto &[FunctionId, Function] : Functions) {
+ Function.Id = FunctionId;
+ for (auto &Pair : Function.PerDistributionData)
+ processPerDistributionData(Pair.second);
+ Output.push_back(std::move(Function));
}
return Output;
}
@@ -130,7 +149,7 @@ void fillScores(MutableArrayRef<FunctionData> Functions) {
const FunctionType Type = Function.Id.Type;
for (const auto &Pair : Function.PerDistributionData) {
const auto &Distribution = Pair.getKey();
- const double Throughput = Pair.getValue().MedianBytesPerSecond;
+ const double Throughput = Pair.getValue().BytesPerSecondMedian;
const Key K{Type, Distribution};
ThroughputMinMax[K].update(Throughput);
}
@@ -140,7 +159,7 @@ void fillScores(MutableArrayRef<FunctionData> Functions) {
const FunctionType Type = Function.Id.Type;
for (const auto &Pair : Function.PerDistributionData) {
const auto &Distribution = Pair.getKey();
- const double Throughput = Pair.getValue().MedianBytesPerSecond;
+ const double Throughput = Pair.getValue().BytesPerSecondMedian;
const Key K{Type, Distribution};
Function.PerDistributionData[Distribution].Score =
ThroughputMinMax[K].normalize(Throughput);
diff --git a/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp b/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp
index bce508d17acbd..56f7bbf3d5f80 100644
--- a/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp
+++ b/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp
@@ -10,6 +10,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+using testing::DoubleNear;
using testing::ElementsAre;
using testing::Pair;
using testing::SizeIs;
@@ -31,8 +32,10 @@ TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsOneSample) {
EXPECT_THAT(Data[0].Id, Foo1);
EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1));
// A single value is provided.
- EXPECT_THAT(
- Data[0].PerDistributionData.lookup(DistA.Name).MedianBytesPerSecond, 4);
+ const auto &DistributionData = Data[0].PerDistributionData.lookup(DistA.Name);
+ EXPECT_THAT(DistributionData.BytesPerSecondMedian, 4);
+ EXPECT_THAT(DistributionData.BytesPerSecondMean, 4);
+ EXPECT_THAT(DistributionData.BytesPerSecondVariance, 0);
}
TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsManySamplesSameBucket) {
@@ -48,8 +51,10 @@ TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsManySamplesSameBucket) {
EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1));
// When multiple values are provided we pick the median one (here median of 4,
// 5, 5).
- EXPECT_THAT(
- Data[0].PerDistributionData.lookup(DistA.Name).MedianBytesPerSecond, 5);
+ const auto &DistributionData = Data[0].PerDistributionData.lookup(DistA.Name);
+ EXPECT_THAT(DistributionData.BytesPerSecondMedian, 5);
+ EXPECT_THAT(DistributionData.BytesPerSecondMean, DoubleNear(4.6, 0.1));
+ EXPECT_THAT(DistributionData.BytesPerSecondVariance, DoubleNear(0.33, 0.01));
}
TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsServeralFunctionAndDist) {
@@ -86,11 +91,11 @@ TEST(AutomemcpyJsonResultsAnalyzer, getScore) {
[](const FunctionData &A, const FunctionData &B) { return A.Id < B.Id; });
EXPECT_THAT(Data[0].Id, Foo1);
- EXPECT_THAT(Data[0].PerDistributionData.lookup("A").MedianBytesPerSecond, 1);
+ EXPECT_THAT(Data[0].PerDistributionData.lookup("A").BytesPerSecondMedian, 1);
EXPECT_THAT(Data[1].Id, Foo2);
- EXPECT_THAT(Data[1].PerDistributionData.lookup("A").MedianBytesPerSecond, 2);
+ EXPECT_THAT(Data[1].PerDistributionData.lookup("A").BytesPerSecondMedian, 2);
EXPECT_THAT(Data[2].Id, Foo3);
- EXPECT_THAT(Data[2].PerDistributionData.lookup("A").MedianBytesPerSecond, 3);
+ EXPECT_THAT(Data[2].PerDistributionData.lookup("A").BytesPerSecondMedian, 3);
// Normalizes throughput per distribution.
fillScores(Data);
More information about the libc-commits
mailing list