@@ -4,10 +4,10 @@ import (
44 "context"
55 "errors"
66 "flag"
7+ "strconv"
78
89 "github.com/go-kit/log"
910 "github.com/go-kit/log/level"
10- "github.com/grafana/dskit/ring"
1111 "github.com/prometheus/client_golang/prometheus"
1212 "github.com/prometheus/client_golang/prometheus/promauto"
1313 "github.com/twmb/franz-go/pkg/kgo"
@@ -16,50 +16,70 @@ import (
1616)
1717
1818type DataObjTeeConfig struct {
19- Enabled bool `yaml:"enabled"`
20- Topic string `yaml:"topic"`
21- MaxBufferedBytes int `yaml:"max_buffered_bytes"`
19+ Enabled bool `yaml:"enabled"`
20+ Topic string `yaml:"topic"`
21+ MaxBufferedBytes int `yaml:"max_buffered_bytes"`
22+ PerPartitionRateBytes int `yaml:"per_partition_rate_bytes"`
23+ DebugMetricsEnabled bool `yaml:"debug_metrics_enabled"`
2224}
2325
2426func (c * DataObjTeeConfig ) RegisterFlags (f * flag.FlagSet ) {
2527 f .BoolVar (& c .Enabled , "distributor.dataobj-tee.enabled" , false , "Enable data object tee." )
2628 f .StringVar (& c .Topic , "distributor.dataobj-tee.topic" , "" , "Topic for data object tee." )
2729 f .IntVar (& c .MaxBufferedBytes , "distributor.dataobj-tee.max-buffered-bytes" , 100 << 20 , "Maximum number of bytes to buffer." )
30+ f .IntVar (& c .PerPartitionRateBytes , "distributor.dataobj-tee.per-partition-rate-bytes" , 1024 * 1024 , "The per-tenant partition rate (bytes/sec)." )
31+ f .BoolVar (& c .DebugMetricsEnabled , "distributor.dataobj-tee.debug-metrics-enabled" , false , "Enables optional debug metrics." )
2832}
2933
3034func (c * DataObjTeeConfig ) Validate () error {
31- if c .Enabled && c .Topic == "" {
35+ if ! c .Enabled {
36+ return nil
37+ }
38+ if c .Topic == "" {
3239 return errors .New ("the topic is required" )
3340 }
41+ if c .MaxBufferedBytes < 0 {
42+ return errors .New ("max buffered bytes cannot be negative" )
43+ }
44+ if c .PerPartitionRateBytes < 0 {
45+ return errors .New ("per partition rate bytes cannot be negative" )
46+ }
3447 return nil
3548}
3649
3750// DataObjTee is a tee that duplicates streams to the data object topic.
3851// It is a temporary solution while we work on segmentation keys.
3952type DataObjTee struct {
40- cfg * DataObjTeeConfig
41- client * kgo.Client
42- ringReader ring.PartitionRingReader
43- logger log.Logger
53+ cfg * DataObjTeeConfig
54+ limitsClient * ingestLimits
55+ kafkaClient * kgo.Client
56+ resolver * SegmentationPartitionResolver
57+ logger log.Logger
4458
4559 // Metrics.
4660 failures prometheus.Counter
4761 total prometheus.Counter
62+
63+ // High cardinality metrics which are only emitted when debug metrics
64+ // are enabled.
65+ produced * prometheus.CounterVec
4866}
4967
5068// NewDataObjTee returns a new DataObjTee.
5169func NewDataObjTee (
5270 cfg * DataObjTeeConfig ,
53- client * kgo.Client ,
54- ringReader ring.PartitionRingReader ,
71+ resolver * SegmentationPartitionResolver ,
72+ limitsClient * ingestLimits ,
73+ kafkaClient * kgo.Client ,
5574 logger log.Logger ,
5675 reg prometheus.Registerer ,
5776) (* DataObjTee , error ) {
5877 return & DataObjTee {
59- cfg : cfg ,
60- client : client ,
61- ringReader : ringReader ,
62- logger : logger ,
78+ cfg : cfg ,
79+ resolver : resolver ,
80+ kafkaClient : kafkaClient ,
81+ limitsClient : limitsClient ,
82+ logger : logger ,
6383 failures : promauto .With (reg ).NewCounter (prometheus.CounterOpts {
6484 Name : "loki_distributor_dataobj_tee_duplicate_stream_failures_total" ,
6585 Help : "Total number of streams that could not be duplicated." ,
@@ -68,21 +88,54 @@ func NewDataObjTee(
6888 Name : "loki_distributor_dataobj_tee_duplicate_streams_total" ,
6989 Help : "Total number of streams duplicated." ,
7090 }),
91+ produced : promauto .With (reg ).NewCounterVec (prometheus.CounterOpts {
92+ Name : "loki_distributor_dataobj_tee_produced_bytes_total" ,
93+ Help : "Total number of bytes produced to each partition." ,
94+ }, []string {"tenant" , "partition" , "segmentation_key" }),
7195 }, nil
7296}
7397
98+ // A SegmentedStream is a KeyedStream with a segmentation key.
99+ type SegmentedStream struct {
100+ KeyedStream
101+ SegmentationKey SegmentationKey
102+ }
103+
74104// Duplicate implements the [Tee] interface.
75- func (t * DataObjTee ) Duplicate (_ context.Context , tenant string , streams []KeyedStream ) {
76- for _ , s := range streams {
77- go t .duplicate (tenant , s )
105+ func (t * DataObjTee ) Duplicate (ctx context.Context , tenant string , streams []KeyedStream ) {
106+ segmentationKeyStreams := make ([]SegmentedStream , 0 , len (streams ))
107+ for _ , stream := range streams {
108+ segmentationKey , err := GetSegmentationKey (stream )
109+ if err != nil {
110+ level .Error (t .logger ).Log ("msg" , "failed to get segmentation key" , "err" , err )
111+ t .failures .Inc ()
112+ return
113+ }
114+ segmentationKeyStreams = append (segmentationKeyStreams , SegmentedStream {
115+ KeyedStream : stream ,
116+ SegmentationKey : segmentationKey ,
117+ })
118+ }
119+ rates , err := t .limitsClient .UpdateRates (ctx , tenant , segmentationKeyStreams )
120+ if err != nil {
121+ level .Error (t .logger ).Log ("msg" , "failed to update rates" , "err" , err )
122+ }
123+ // fastRates is a temporary lookup table that lets us find the rate
124+ // for a segmentation key in constant time.
125+ fastRates := make (map [uint64 ]uint64 , len (rates ))
126+ for _ , rate := range rates {
127+ fastRates [rate .StreamHash ] = rate .Rate
128+ }
129+ for _ , s := range segmentationKeyStreams {
130+ go t .duplicate (ctx , tenant , s , fastRates [s .SegmentationKey .Sum64 ()])
78131 }
79132}
80133
81- func (t * DataObjTee ) duplicate (tenant string , stream KeyedStream ) {
134+ func (t * DataObjTee ) duplicate (ctx context. Context , tenant string , stream SegmentedStream , rateBytes uint64 ) {
82135 t .total .Inc ()
83- partition , err := t .ringReader . PartitionRing (). ActivePartitionForKey ( stream .HashKey )
136+ partition , err := t .resolver . Resolve ( ctx , stream .SegmentationKey , rateBytes )
84137 if err != nil {
85- level .Error (t .logger ).Log ("msg" , "failed to get partition" , "err" , err )
138+ level .Error (t .logger ).Log ("msg" , "failed to resolve partition" , "err" , err )
86139 t .failures .Inc ()
87140 return
88141 }
@@ -92,9 +145,16 @@ func (t *DataObjTee) duplicate(tenant string, stream KeyedStream) {
92145 t .failures .Inc ()
93146 return
94147 }
95- results := t .client .ProduceSync (context . TODO () , records ... )
148+ results := t .kafkaClient .ProduceSync (ctx , records ... )
96149 if err := results .FirstErr (); err != nil {
97150 level .Error (t .logger ).Log ("msg" , "failed to produce records" , "err" , err )
98151 t .failures .Inc ()
99152 }
153+ if t .cfg .DebugMetricsEnabled {
154+ t .produced .WithLabelValues (
155+ tenant ,
156+ strconv .FormatInt (int64 (partition ), 10 ),
157+ string (stream .SegmentationKey ),
158+ ).Add (float64 (stream .Stream .Size ()))
159+ }
100160}
0 commit comments