Skip to content

Commit 3a7e8df

Browse files
perf: Establish BulkCopyAsync performance baseline
The generic `BulkCopyToAsync` method, even with optimizations, showed significant overhead from its configuration and abstraction layers when compared to other high-performance EF Core extensions. This commit establishes a "best-case scenario" baseline by: - Rewriting the primary benchmark to use a direct, hard-coded Npgsql binary copy, bypassing the generic helper method entirely. - Updating the benchmark project to use direct project references instead of NuGet packages, streamlining the development cycle. - Refining benchmark parameters for more accurate measurements. This provides a clear target for future optimizations of the generic helper method.
1 parent 62d33ed commit 3a7e8df

File tree

18 files changed

+402
-180
lines changed

18 files changed

+402
-180
lines changed

CmdScale.EntityFrameworkCore.TimescaleDB.Benchmarks/BulkCopyToAsyncBenchmarks.cs

Lines changed: 0 additions & 67 deletions
This file was deleted.

CmdScale.EntityFrameworkCore.TimescaleDB.Benchmarks/CmdScale.EntityFrameworkCore.TimescaleDB.Benchmarks.csproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313

1414
<ItemGroup>
1515
<PackageReference Include="BenchmarkDotNet" Version="0.15.2" />
16+
<PackageReference Include="Testcontainers.PostgreSql" Version="4.7.0" />
17+
<PackageReference Include="Z.EntityFramework.Extensions.EFCore" Version="9.103.9.3" />
1618
</ItemGroup>
1719

1820
<ItemGroup>

CmdScale.EntityFrameworkCore.TimescaleDB.Benchmarks/README.md

Lines changed: 2 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,59 +7,9 @@ This project uses **BenchmarkDotNet** to measure the performance of high-through
77
## Prerequisites
88

99
- .NET 8 SDK or later
10-
- Docker and Docker Compose
10+
- Docker
1111

12-
---
13-
14-
## Step 1: Start a Clean Database
15-
16-
The benchmarks require a running TimescaleDB instance. A `docker-compose.yml` file is provided in the project root to simplify this process.
17-
18-
### 🔄 Stop and Reset (if needed)
19-
20-
To ensure you start with a clean slate, run this command to stop any running containers and permanently delete all existing data.
21-
22-
```bash
23-
docker compose down -v
24-
```
25-
26-
### ▶️ Start the Database
27-
28-
Launch a new TimescaleDB instance in the background.
29-
30-
```bash
31-
docker compose up -d
32-
```
33-
34-
### ✅ Verify Connection String
35-
36-
Ensure the connection string in `BulkCopyToAsyncBenchmarks.cs` matches the settings in your `docker-compose.yml` file (the default should work).
37-
38-
---
39-
40-
## Step 2: Apply Migrations
41-
42-
Next, create the necessary tables in the new database using EF Core migrations.
43-
44-
### ➕ Add a Migration
45-
46-
Create a new migration. Run this from the root of the solution.
47-
48-
```bash
49-
dotnet ef migrations add <YourMigrationName> --project CmdScale.EntityFrameworkCore.TimescaleDB.Example.DataAccess --startup-project CmdScale.EntityFrameworkCore.TimescaleDB.Example
50-
```
51-
52-
### ⬆️ Update the Database
53-
54-
Apply the migrations to create the hypertable schema.
55-
56-
```bash
57-
dotnet ef database update --project CmdScale.EntityFrameworkCore.TimescaleDB.Example.DataAccess --startup-project CmdScale.EntityFrameworkCore.TimescaleDB.Example
58-
```
59-
60-
---
61-
62-
## Step 3: Run the Benchmarks
12+
## Run the Benchmarks
6313

6414
Once the database is set up, you can run the performance tests.
6515

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
using BenchmarkDotNet.Attributes;
2+
using CmdScale.EntityFrameworkCore.TimescaleDB.Example.DataAccess;
3+
using Microsoft.EntityFrameworkCore;
4+
using Testcontainers.PostgreSql;
5+
6+
namespace CmdScale.EntityFrameworkCore.TimescaleDB.Benchmarks
7+
{
8+
public abstract class WriteRecordsBenchmarkBase<T> where T : class
9+
{
10+
public int NumberOfRecords;
11+
public int MaxBatchSize;
12+
public int NumberOfWorkers;
13+
14+
private readonly PostgreSqlContainer _dbContainer = new PostgreSqlBuilder()
15+
.WithImage("timescale/timescaledb:latest-pg17")
16+
.WithDatabase("benchmark_tests_db")
17+
.WithUsername("test_user")
18+
.WithPassword("test_password")
19+
.Build();
20+
21+
protected string ConnectionString = "";
22+
protected readonly List<T> Trades = [];
23+
protected TimescaleContext? Context;
24+
25+
[GlobalSetup]
26+
public async Task Setup()
27+
{
28+
await _dbContainer.StartAsync();
29+
ConnectionString = _dbContainer.GetConnectionString();
30+
31+
DbContextOptionsBuilder<TimescaleContext> optionsBuilder = new();
32+
optionsBuilder.UseNpgsql(ConnectionString).UseTimescaleDb();
33+
Context = new TimescaleContext(optionsBuilder.Options);
34+
35+
await Context.Database.MigrateAsync();
36+
Console.WriteLine("Migration applied successfully.");
37+
}
38+
39+
[GlobalCleanup]
40+
public async Task GlobalCleanup()
41+
{
42+
await _dbContainer.DisposeAsync();
43+
}
44+
45+
[IterationSetup]
46+
public void IterationSetup()
47+
{
48+
Trades.Clear();
49+
var random = new Random();
50+
string[] tickers = ["AAPL", "GOOGL", "MSFT", "TSLA", "AMZN"];
51+
var baseTimestamp = DateTime.UtcNow.AddMinutes(-30);
52+
53+
for (int i = 0; i < NumberOfRecords; i++)
54+
{
55+
var trade = CreateTradeInstance(i, baseTimestamp, tickers[random.Next(tickers.Length)], random);
56+
Trades.Add(trade);
57+
}
58+
59+
// Truncate the table before each iteration for a clean slate
60+
string tableName = GetTableName();
61+
string sql = $"TRUNCATE TABLE \"{tableName}\"";
62+
Context!.Database.ExecuteSqlRaw(sql);
63+
}
64+
65+
protected abstract T CreateTradeInstance(int index, DateTime baseTimestamp, string ticker, Random random);
66+
protected abstract string GetTableName();
67+
}
68+
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
using BenchmarkDotNet.Attributes;
2+
using CmdScale.EntityFrameworkCore.TimescaleDB.Abstractions;
3+
using CmdScale.EntityFrameworkCore.TimescaleDB.Example.DataAccess.Models;
4+
using Npgsql;
5+
using NpgsqlTypes;
6+
7+
namespace CmdScale.EntityFrameworkCore.TimescaleDB.Benchmarks
8+
{
9+
[Config(typeof(InProcessConfig))]
10+
[MemoryDiagnoser]
11+
[ThreadingDiagnoser]
12+
public class WriteRecordsKeylessBenchmarks : WriteRecordsBenchmarkBase<Trade>
13+
{
14+
[Params(100_000, 500_000)]
15+
public new int NumberOfRecords;
16+
17+
[Params(25_000, 50_000, 100_000)]
18+
public new int MaxBatchSize;
19+
20+
[Params(8)]
21+
public new int NumberOfWorkers;
22+
23+
private readonly List<Task> tasks = [];
24+
private int totalRecords;
25+
private int workerChunkSize;
26+
27+
protected override Trade CreateTradeInstance(int index, DateTime baseTimestamp, string ticker, Random random)
28+
{
29+
return new Trade
30+
{
31+
Timestamp = baseTimestamp.AddMicroseconds(index),
32+
Ticker = ticker,
33+
Price = (decimal)(100 + random.NextDouble() * 400),
34+
Size = random.Next(1, 100)
35+
};
36+
}
37+
38+
protected override string GetTableName() => "Trades";
39+
40+
[Benchmark]
41+
public async Task BulkCopyAsync()
42+
{
43+
TimescaleCopyConfig<Trade> config = new TimescaleCopyConfig<Trade>()
44+
.ToTable(GetTableName())
45+
.WithWorkers(NumberOfWorkers)
46+
.WithBatchSize(MaxBatchSize);
47+
48+
await Trades.BulkCopyAsync(ConnectionString, config);
49+
}
50+
51+
[Benchmark]
52+
public async Task HardcodedBulkCopyAsync()
53+
{
54+
totalRecords = Trades.Count;
55+
workerChunkSize = (int)Math.Ceiling((double)totalRecords / NumberOfWorkers);
56+
57+
for (int i = 0; i < NumberOfWorkers; i++)
58+
{
59+
int startIndex = i * workerChunkSize;
60+
int currentWorkerDataSize = Math.Min(workerChunkSize, totalRecords - startIndex);
61+
62+
if (currentWorkerDataSize <= 0)
63+
{
64+
break;
65+
}
66+
67+
List<Trade> workerData = [.. Trades.Skip(startIndex).Take(currentWorkerDataSize)];
68+
tasks.Add(Task.Run(async () =>
69+
{
70+
// Open new connection to DB
71+
using NpgsqlConnection connection = new(ConnectionString);
72+
await connection.OpenAsync();
73+
74+
// Command to copy data in a binary format from a client-application
75+
string copyCommand = "COPY \"Trades\" (\"Time\", \"Value\", \"SegmentId\", \"SignalId\") FROM STDIN (FORMAT BINARY)";
76+
77+
for (int j = 0; j < workerData.Count; j += MaxBatchSize)
78+
{
79+
List<Trade> currentBatch = [.. workerData.Skip(j).Take(MaxBatchSize)];
80+
81+
// Start a binary import stream
82+
await using NpgsqlBinaryImporter writer = connection.BeginBinaryImport(copyCommand);
83+
foreach (Trade item in currentBatch)
84+
{
85+
// IMPORTANT: Columns must be inserted in the exact order
86+
await writer.WriteAsync(item.Timestamp, NpgsqlDbType.TimestampTz);
87+
await writer.WriteAsync(item.Ticker, NpgsqlDbType.Text);
88+
await writer.WriteAsync(item.Price, NpgsqlDbType.Numeric);
89+
await writer.WriteAsync(item.Size, NpgsqlDbType.Integer);
90+
await writer.WriteAsync(item.Exchange, NpgsqlDbType.Text);
91+
}
92+
93+
await writer.CompleteAsync();
94+
}
95+
}));
96+
}
97+
98+
await Task.WhenAll(tasks);
99+
}
100+
101+
[Benchmark]
102+
public async Task BatchedBulkInsertOptimizedAsync()
103+
{
104+
foreach (Trade[] chunk in Trades.Chunk(MaxBatchSize))
105+
{
106+
await Context!.BulkInsertOptimizedAsync(chunk.ToList());
107+
}
108+
}
109+
}
110+
}

0 commit comments

Comments
 (0)