Skip to content

Commit 194e653

Browse files
committed
feat: add AddHash to CardinalityEstimator to allow use of precomputed hashes
feat: add benchmarks for AddHash
1 parent f527600 commit 194e653

File tree

2 files changed

+42
-0
lines changed

2 files changed

+42
-0
lines changed

CardinalityEstimation.Benchmark/Program.cs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,43 @@
44
using BenchmarkDotNet.Jobs;
55
using BenchmarkDotNet.Running;
66
using CardinalityEstimation;
7+
using System.Text;
78

89
var config = DefaultConfig.Instance
910
.AddJob(Job.Default.WithId("Core80").WithRuntime(CoreRuntime.Core80))
1011
.AddJob(Job.Default.WithId("Core90").WithRuntime(CoreRuntime.Core90));
1112

13+
BenchmarkRunner.Run<AddHash>(config);
1214
BenchmarkRunner.Run<DifferentHashes>(config);
1315

16+
[MemoryDiagnoser]
17+
public class AddHash
18+
{
19+
public static readonly Random Rand = new Random();
20+
21+
private const int N = 10000000;
22+
23+
private ulong[] dataHashes = Enumerable.Range(0, N)
24+
.Select(_ => Rand.Next().ToString() + Guid.NewGuid().ToString() + Rand.Next().ToString())
25+
.Select(x => BitConverter.ToUInt64(System.IO.Hashing.XxHash128.Hash(Encoding.UTF8.GetBytes(x))))
26+
.ToArray();
27+
28+
[Params(4, 16)]
29+
public int Bits { get; set; }
30+
31+
[Benchmark]
32+
public void Run() => Run(Bits, (x) => throw new InvalidProgramException());
33+
34+
private void Run(int bits, GetHashCodeDelegate hashFunction)
35+
{
36+
var hll = new CardinalityEstimator(hashFunction, bits);
37+
for (var i = 0; i < N; i++)
38+
{
39+
hll.AddHash(dataHashes[i]);
40+
}
41+
}
42+
}
43+
1444
[MemoryDiagnoser]
1545
public class DifferentHashes
1646
{

CardinalityEstimation/CardinalityEstimator.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,18 @@ public bool Add(ReadOnlyMemory<byte> element)
469469
return changed;
470470
}
471471

472+
/// <summary>
473+
/// Adds an element's hash code to the counted set
474+
/// </summary>
475+
/// <param name="hashCode">Hash code of the element to add</param>
476+
/// <returns>True if the estimator's state was modified, false otherwise</returns>
477+
public bool AddHash(ulong hashCode)
478+
{
479+
bool changed = AddElementHash(hashCode);
480+
CountAdditions++;
481+
return changed;
482+
}
483+
472484
/// <summary>
473485
/// Returns the estimated number of items in the estimator
474486
/// </summary>

0 commit comments

Comments
 (0)