Skip to content

Commit 648356c

Browse files
authored
caching bucket for parquet chunks file (#6805)
* caching bucket for parquet chunks file Signed-off-by: yeya24 <[email protected]> * update parquet common version Signed-off-by: yeya24 <[email protected]> * use empty serieset instead of nil Signed-off-by: yeya24 <[email protected]> * changelog Signed-off-by: yeya24 <[email protected]> --------- Signed-off-by: yeya24 <[email protected]>
1 parent 8ad8c44 commit 648356c

File tree

9 files changed

+54
-19
lines changed

9 files changed

+54
-19
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
* [ENHANCEMENT] Ingester: Handle runtime errors in query path #6769
3939
* [ENHANCEMENT] Compactor: Support metadata caching bucket for Cleaner. Can be enabled via `-compactor.cleaner-caching-bucket-enabled` flag. #6778
4040
* [ENHANCEMENT] Compactor, Store Gateway: Introduce user scanner strategy and user index. #6780
41+
* [ENHANCEMENT] Querier: Support chunks cache for parquet queryable. #6805
4142
* [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517
4243
* [BUGFIX] Ingester: Fix labelset data race condition. #6573
4344
* [BUGFIX] Compactor: Cleaner should not put deletion marker for blocks with no-compact marker. #6576

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ require (
8282
github.com/hashicorp/golang-lru/v2 v2.0.7
8383
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822
8484
github.com/parquet-go/parquet-go v0.25.1
85-
github.com/prometheus-community/parquet-common v0.0.0-20250606162055-b81ebb7e1b96
85+
github.com/prometheus-community/parquet-common v0.0.0-20250610002942-dfd72bae1309
8686
github.com/prometheus/procfs v0.16.1
8787
github.com/sercand/kuberesolver/v5 v5.1.1
8888
github.com/tjhop/slog-gokit v0.1.4

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1543,8 +1543,8 @@ github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndr
15431543
github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
15441544
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
15451545
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
1546-
github.com/prometheus-community/parquet-common v0.0.0-20250606162055-b81ebb7e1b96 h1:5EbDNJOxTWGpe6yzXdgcBCU63BRSrRAh0Q1oB5AVyoA=
1547-
github.com/prometheus-community/parquet-common v0.0.0-20250606162055-b81ebb7e1b96/go.mod h1:MwYpD+FKot7LWBMFaPS6FeM8oqo77u5erRlNkSSFPA0=
1546+
github.com/prometheus-community/parquet-common v0.0.0-20250610002942-dfd72bae1309 h1:xGnXldBSTFPopLYi7ce+kJb+A1h1mPTeF4SLlRTEek0=
1547+
github.com/prometheus-community/parquet-common v0.0.0-20250610002942-dfd72bae1309/go.mod h1:MwYpD+FKot7LWBMFaPS6FeM8oqo77u5erRlNkSSFPA0=
15481548
github.com/prometheus-community/prom-label-proxy v0.11.0 h1:IO02WiiFMfcIqvjhwMbCYnDJiTNcSHBrkCGRQ/7KDd0=
15491549
github.com/prometheus-community/prom-label-proxy v0.11.0/go.mod h1:lfvrG70XqsxWDrSh1843QXBG0fSg8EbIXmAo8xGsvw8=
15501550
github.com/prometheus/alertmanager v0.28.1 h1:BK5pCoAtaKg01BYRUJhEDV1tqJMEtYBGzPw8QdvnnvA=

pkg/querier/blocks_store_queryable.go

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -186,19 +186,10 @@ func NewBlocksStoreQueryable(
186186
func NewBlocksStoreQueryableFromConfig(querierCfg Config, gatewayCfg storegateway.Config, storageCfg cortex_tsdb.BlocksStorageConfig, limits BlocksStoreLimits, logger log.Logger, reg prometheus.Registerer) (*BlocksStoreQueryable, error) {
187187
var stores BlocksStoreSet
188188

189-
bucketClient, err := bucket.NewClient(context.Background(), storageCfg.Bucket, gatewayCfg.HedgedRequest.GetHedgedRoundTripper(), "querier", logger, reg)
189+
bucketClient, err := createCachingBucketClient(context.Background(), storageCfg, gatewayCfg.HedgedRequest.GetHedgedRoundTripper(), "querier", logger, reg)
190190
if err != nil {
191-
return nil, errors.Wrap(err, "failed to create bucket client")
191+
return nil, err
192192
}
193-
194-
// Blocks finder doesn't use chunks, but we pass config for consistency.
195-
matchers := cortex_tsdb.NewMatchers()
196-
cachingBucket, err := cortex_tsdb.CreateCachingBucket(storageCfg.BucketStore.ChunksCache, storageCfg.BucketStore.MetadataCache, matchers, bucketClient, logger, extprom.WrapRegistererWith(prometheus.Labels{"component": "querier"}, reg))
197-
if err != nil {
198-
return nil, errors.Wrap(err, "create caching bucket")
199-
}
200-
bucketClient = cachingBucket
201-
202193
// Create the blocks finder.
203194
var finder BlocksFinder
204195
if storageCfg.BucketStore.BucketIndex.Enabled {

pkg/querier/bucket.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package querier
2+
3+
import (
4+
"context"
5+
"net/http"
6+
7+
"github.com/go-kit/log"
8+
"github.com/pkg/errors"
9+
"github.com/prometheus/client_golang/prometheus"
10+
"github.com/thanos-io/objstore"
11+
"github.com/thanos-io/thanos/pkg/extprom"
12+
13+
"github.com/cortexproject/cortex/pkg/storage/bucket"
14+
cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
15+
)
16+
17+
func createCachingBucketClient(ctx context.Context, storageCfg cortex_tsdb.BlocksStorageConfig, hedgedRoundTripper func(rt http.RoundTripper) http.RoundTripper, name string, logger log.Logger, reg prometheus.Registerer) (objstore.InstrumentedBucket, error) {
18+
bucketClient, err := bucket.NewClient(ctx, storageCfg.Bucket, hedgedRoundTripper, name, logger, reg)
19+
if err != nil {
20+
return nil, errors.Wrap(err, "failed to create bucket client")
21+
}
22+
23+
// Blocks finder doesn't use chunks, but we pass config for consistency.
24+
matchers := cortex_tsdb.NewMatchers()
25+
cachingBucket, err := cortex_tsdb.CreateCachingBucket(storageCfg.BucketStore.ChunksCache, storageCfg.BucketStore.MetadataCache, matchers, bucketClient, logger, extprom.WrapRegistererWith(prometheus.Labels{"component": name}, reg))
26+
if err != nil {
27+
return nil, errors.Wrap(err, "create caching bucket")
28+
}
29+
bucketClient = cachingBucket
30+
return bucketClient, nil
31+
}

pkg/querier/parquet_queryable.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,11 @@ func NewParquetQueryable(
115115
logger log.Logger,
116116
reg prometheus.Registerer,
117117
) (storage.Queryable, error) {
118-
bucketClient, err := bucket.NewClient(context.Background(), storageCfg.Bucket, nil, "parquet-querier", logger, reg)
119-
118+
bucketClient, err := createCachingBucketClient(context.Background(), storageCfg, nil, "parquet-querier", logger, reg)
120119
if err != nil {
121120
return nil, err
122121
}
122+
123123
manager, err := services.NewManager(blockStorageQueryable)
124124
if err != nil {
125125
return nil, err
@@ -400,7 +400,7 @@ func (q *parquetQuerierWithFallback) Select(ctx context.Context, sortSeries bool
400400
hints.End = maxt
401401

402402
if maxt < mint {
403-
return nil
403+
return storage.EmptySeriesSet()
404404
}
405405

406406
remaining, parquet, err := q.getBlocks(ctx, mint, maxt)

pkg/storage/tsdb/caching_bucket.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ func CreateCachingBucket(chunksConfig ChunksCacheConfig, metadataConfig Metadata
215215
cachingConfigured = true
216216
chunksCache = cache.NewTracingCache(chunksCache)
217217
cfg.CacheGetRange("chunks", chunksCache, matchers.GetChunksMatcher(), chunksConfig.SubrangeSize, chunksConfig.AttributesTTL, chunksConfig.SubrangeTTL, chunksConfig.MaxGetRangeRequests)
218+
cfg.CacheGetRange("parquet-chunks", chunksCache, matchers.GetParquetChunksMatcher(), chunksConfig.SubrangeSize, chunksConfig.AttributesTTL, chunksConfig.SubrangeTTL, chunksConfig.MaxGetRangeRequests)
218219
}
219220

220221
metadataCache, err := createMetadataCache("metadata-cache", &metadataConfig.MetadataCacheBackend, logger, reg)
@@ -356,6 +357,7 @@ type Matchers struct {
356357
func NewMatchers() Matchers {
357358
matcherMap := make(map[string]func(string) bool)
358359
matcherMap["chunks"] = isTSDBChunkFile
360+
matcherMap["parquet-chunks"] = isParquetChunkFile
359361
matcherMap["metafile"] = isMetaFile
360362
matcherMap["block-index"] = isBlockIndexFile
361363
matcherMap["bucket-index"] = isBucketIndexFiles
@@ -375,6 +377,10 @@ func (m *Matchers) SetChunksMatcher(f func(string) bool) {
375377
m.matcherMap["chunks"] = f
376378
}
377379

380+
func (m *Matchers) SetParquetChunksMatcher(f func(string) bool) {
381+
m.matcherMap["parquet-chunks"] = f
382+
}
383+
378384
func (m *Matchers) SetBlockIndexMatcher(f func(string) bool) {
379385
m.matcherMap["block-index"] = f
380386
}
@@ -399,6 +405,10 @@ func (m *Matchers) GetChunksMatcher() func(string) bool {
399405
return m.matcherMap["chunks"]
400406
}
401407

408+
func (m *Matchers) GetParquetChunksMatcher() func(string) bool {
409+
return m.matcherMap["parquet-chunks"]
410+
}
411+
402412
func (m *Matchers) GetMetafileMatcher() func(string) bool {
403413
return m.matcherMap["metafile"]
404414
}
@@ -427,6 +437,8 @@ var chunksMatcher = regexp.MustCompile(`^.*/chunks/\d+$`)
427437

428438
func isTSDBChunkFile(name string) bool { return chunksMatcher.MatchString(name) }
429439

440+
func isParquetChunkFile(name string) bool { return strings.HasSuffix(name, "chunks.parquet") }
441+
430442
func isMetaFile(name string) bool {
431443
return strings.HasSuffix(name, "/"+metadata.MetaFilename) || strings.HasSuffix(name, "/"+metadata.DeletionMarkFilename) || strings.HasSuffix(name, "/"+TenantDeletionMarkFile)
432444
}

vendor/github.com/prometheus-community/parquet-common/storage/bucket_read_at.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/modules.txt

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)