Skip to content

Commit 6f49f4f

Browse files
blind-oraclekyleconroy
authored andcommitted
Asynchronous sendStatus(), pass WAL position to handler, fix plugin args etc (#2)
* Remove vendor folder * don't drop the replication slot * Fix pluginArgs * relation: change to pointer, add Get(), fix sendStatus() * pass wal position to handler * send status messages async, fix example, other stuff * Fix test * revert timeouts * Add Flush() function. By default send status msgs only with write walPos * Add walRetain option * Take manual flushes into account * check for nil message * make relation non-pointer, fix misc * fix relation Get * add relation.IsEmpty() * change relation.Get * Cosmetic changes * Add connInfo to NewRelationSet(), fail on handler knob, cleanups for PR
1 parent 516819d commit 6f49f4f

File tree

5 files changed

+164
-77
lines changed

5 files changed

+164
-77
lines changed

examples/replicate.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ func main() {
1717
log.Fatal(err)
1818
}
1919

20-
set := pgoutput.NewRelationSet()
20+
set := pgoutput.NewRelationSet(nil)
2121

2222
dump := func(relation uint32, row []pgoutput.Tuple) error {
2323
values, err := set.Values(relation, row)
@@ -31,8 +31,7 @@ func main() {
3131
return nil
3232
}
3333

34-
handler := func(m pgoutput.Message) error {
35-
return fmt.Errorf("hey")
34+
handler := func(m pgoutput.Message, walPos uint64) error {
3635
switch v := m.(type) {
3736
case pgoutput.Relation:
3837
log.Printf("RELATION")
@@ -50,8 +49,8 @@ func main() {
5049
return nil
5150
}
5251

53-
sub := pgoutput.NewSubscription("sub1", "pub1")
54-
if err := sub.Start(ctx, conn, handler); err != nil {
52+
sub := pgoutput.NewSubscription(conn, "sub1", "pub1", 0, false)
53+
if err := sub.Start(ctx, 0, handler); err != nil {
5554
log.Fatal(err)
5655
}
5756
}

parse.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ type Relation struct {
129129
Columns []Column
130130
}
131131

132+
func (r Relation) IsEmpty() bool {
133+
return r.ID == 0 && r.Name == "" && r.Replica == 0 && len(r.Columns) == 0
134+
}
135+
132136
type Type struct {
133137
// ID of the data type
134138
ID uint32

parse_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ func GenerateLogicalReplicationFiles(t *testing.T) {
5151

5252
func TestParseWalData(t *testing.T) {
5353
files, _ := filepath.Glob("testdata/*")
54-
set := NewRelationSet()
54+
set := NewRelationSet(nil)
5555

5656
expected := map[int]struct {
5757
ID int32

sub.go

Lines changed: 135 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ package pgoutput
33
import (
44
"context"
55
"fmt"
6-
"log"
6+
"sync"
7+
"sync/atomic"
78
"time"
89

910
"github.com/jackc/pgx"
@@ -14,120 +15,190 @@ type Subscription struct {
1415
Publication string
1516
WaitTimeout time.Duration
1617
StatusTimeout time.Duration
17-
CopyData bool
18+
19+
conn *pgx.ReplicationConn
20+
maxWal uint64
21+
walRetain uint64
22+
walFlushed uint64
23+
24+
failOnHandler bool
25+
26+
// Mutex is used to prevent reading and writing to a connection at the same time
27+
sync.Mutex
1828
}
1929

20-
type Handler func(Message) error
30+
type Handler func(Message, uint64) error
2131

22-
func NewSubscription(name, publication string) *Subscription {
32+
func NewSubscription(conn *pgx.ReplicationConn, name, publication string, walRetain uint64, failOnHandler bool) *Subscription {
2333
return &Subscription{
2434
Name: name,
2535
Publication: publication,
26-
WaitTimeout: time.Second * 10,
27-
StatusTimeout: time.Second * 10,
28-
CopyData: true,
36+
WaitTimeout: 1 * time.Second,
37+
StatusTimeout: 10 * time.Second,
38+
39+
conn: conn,
40+
walRetain: walRetain,
41+
failOnHandler: failOnHandler,
2942
}
3043
}
3144

3245
func pluginArgs(version, publication string) string {
33-
return fmt.Sprintf(`("proto_version" '%s', "publication_names" '%s')`, version, publication)
46+
return fmt.Sprintf(`"proto_version" '%s', "publication_names" '%s'`, version, publication)
3447
}
3548

36-
func (s *Subscription) Start(ctx context.Context, conn *pgx.ReplicationConn, h Handler) error {
37-
// TODO: Struct Validation here
38-
_ = conn.DropReplicationSlot(s.Name)
39-
49+
// CreateSlot creates a replication slot if it doesn't exist
50+
func (s *Subscription) CreateSlot() (err error) {
4051
// If creating the replication slot fails with code 42710, this means
4152
// the replication slot already exists.
42-
err := conn.CreateReplicationSlot(s.Name, "pgoutput")
43-
if err != nil {
53+
if err = s.conn.CreateReplicationSlot(s.Name, "pgoutput"); err != nil {
4454
pgerr, ok := err.(pgx.PgError)
45-
if !ok {
46-
return fmt.Errorf("failed to create replication slot: %s", err)
47-
}
48-
if pgerr.Code != "42710" {
49-
return fmt.Errorf("failed to create replication slot: %s", err)
55+
if !ok || pgerr.Code != "42710" {
56+
return
5057
}
58+
59+
err = nil
60+
}
61+
62+
return
63+
}
64+
65+
func (s *Subscription) sendStatus(walWrite, walFlush uint64) error {
66+
if walFlush > walWrite {
67+
return fmt.Errorf("walWrite should be >= walFlush")
5168
}
5269

53-
// rows, err := conn.IdentifySystem()
54-
// if err != nil {
55-
// return err
56-
// }
70+
s.Lock()
71+
defer s.Unlock()
5772

58-
// var slotName, consitentPoint, snapshotName, outputPlugin string
59-
// if err := row.Scan(&slotName, &consitentPoint, &snapshotName, &outputPlugin); err != nil {
60-
// return err
61-
// }
73+
k, err := pgx.NewStandbyStatus(walFlush, walFlush, walWrite)
74+
if err != nil {
75+
return fmt.Errorf("error creating status: %s", err)
76+
}
6277

63-
// log.Printf("slotName: %s\n", slotName)
64-
// log.Printf("consitentPoint: %s\n", consitentPoint)
65-
// log.Printf("snapshotName: %s\n", snapshotName)
66-
// log.Printf("outputPlugin: %s\n", outputPlugin)
78+
if err = s.conn.SendStandbyStatus(k); err != nil {
79+
return err
80+
}
6781

68-
// Open a transaction on the server
69-
// SET TRANSACTION SNAPSHOT id
70-
// read all the data from the tables
82+
return nil
83+
}
7184

72-
err = conn.StartReplication(s.Name, 0, -1, pluginArgs("1", s.Publication))
85+
// Flush sends the status message to server indicating that we've fully applied all of the events until maxWal.
86+
// This allows PostgreSQL to purge it's WAL logs
87+
func (s *Subscription) Flush() error {
88+
wp := atomic.LoadUint64(&s.maxWal)
89+
err := s.sendStatus(wp, wp)
90+
if err == nil {
91+
atomic.StoreUint64(&s.walFlushed, wp)
92+
}
93+
94+
return err
95+
}
96+
97+
// Start replication and block until error or ctx is canceled
98+
func (s *Subscription) Start(ctx context.Context, startLSN uint64, h Handler) (err error) {
99+
err = s.conn.StartReplication(s.Name, startLSN, -1, pluginArgs("1", s.Publication))
73100
if err != nil {
74101
return fmt.Errorf("failed to start replication: %s", err)
75102
}
76103

77-
var maxWal uint64
104+
s.maxWal = startLSN
78105

79106
sendStatus := func() error {
80-
k, err := pgx.NewStandbyStatus(maxWal)
81-
if err != nil {
82-
return fmt.Errorf("error creating standby status: %s", err)
107+
walPos := atomic.LoadUint64(&s.maxWal)
108+
walLastFlushed := atomic.LoadUint64(&s.walFlushed)
109+
110+
// Confirm only walRetain bytes in past
111+
// If walRetain is zero - will confirm current walPos as flushed
112+
walFlush := walPos - s.walRetain
113+
114+
if walLastFlushed > walFlush {
115+
// If there was a manual flush - report it's position until we're past it
116+
walFlush = walLastFlushed
117+
} else if walFlush < 0 {
118+
// If we have less than walRetain bytes - just report zero
119+
walFlush = 0
83120
}
84-
if err := conn.SendStandbyStatus(k); err != nil {
85-
return fmt.Errorf("failed to send standy status: %s", err)
86-
}
87-
return nil
121+
122+
return s.sendStatus(walPos, walFlush)
88123
}
89124

90-
tick := time.NewTicker(s.StatusTimeout).C
125+
go func() {
126+
tick := time.NewTicker(s.StatusTimeout)
127+
defer tick.Stop()
128+
129+
for {
130+
select {
131+
case <-tick.C:
132+
if err = sendStatus(); err != nil {
133+
return
134+
}
135+
136+
case <-ctx.Done():
137+
return
138+
}
139+
}
140+
}()
141+
91142
for {
92143
select {
93-
case <-tick:
94-
log.Println("pub status")
95-
if maxWal == 0 {
96-
continue
97-
}
98-
if err := sendStatus(); err != nil {
99-
return err
144+
case <-ctx.Done():
145+
// Send final status and exit
146+
if err = sendStatus(); err != nil {
147+
return fmt.Errorf("Unable to send final status: %s", err)
100148
}
149+
150+
return
151+
101152
default:
102153
var message *pgx.ReplicationMessage
103154
wctx, cancel := context.WithTimeout(ctx, s.WaitTimeout)
104-
message, err = conn.WaitForReplicationMessage(wctx)
155+
s.Lock()
156+
message, err = s.conn.WaitForReplicationMessage(wctx)
157+
s.Unlock()
105158
cancel()
159+
106160
if err == context.DeadlineExceeded {
107161
continue
108-
}
109-
if err != nil {
162+
} else if err == context.Canceled {
163+
return
164+
} else if err != nil {
110165
return fmt.Errorf("replication failed: %s", err)
111166
}
167+
168+
if message == nil {
169+
return fmt.Errorf("replication failed: nil message received, should not happen")
170+
}
171+
112172
if message.WalMessage != nil {
113-
if message.WalMessage.WalStart > maxWal {
114-
maxWal = message.WalMessage.WalStart
173+
var logmsg Message
174+
walStart := message.WalMessage.WalStart
175+
176+
// Skip stuff that's in the past
177+
if walStart > 0 && walStart <= startLSN {
178+
continue
115179
}
116-
logmsg, err := Parse(message.WalMessage.WalData)
180+
181+
if walStart > atomic.LoadUint64(&s.maxWal) {
182+
atomic.StoreUint64(&s.maxWal, walStart)
183+
}
184+
185+
logmsg, err = Parse(message.WalMessage.WalData)
117186
if err != nil {
118187
return fmt.Errorf("invalid pgoutput message: %s", err)
119188
}
120-
if err := h(logmsg); err != nil {
121-
return fmt.Errorf("error handling waldata: %s", err)
189+
190+
// Ignore the error from handler for now
191+
if err = h(logmsg, walStart); err != nil && s.failOnHandler {
192+
return
122193
}
123-
}
124-
if message.ServerHeartbeat != nil {
194+
} else if message.ServerHeartbeat != nil {
125195
if message.ServerHeartbeat.ReplyRequested == 1 {
126-
log.Println("server wants a reply")
127-
if err := sendStatus(); err != nil {
128-
return err
196+
if err = sendStatus(); err != nil {
197+
return
129198
}
130199
}
200+
} else {
201+
return fmt.Errorf("No WalMessage/ServerHeartbeat defined in packet, should not happen")
131202
}
132203
}
133204
}

values.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,34 +7,47 @@ import (
77
)
88

99
type RelationSet struct {
10-
// TODO: Add mutex
10+
// Mutex probably will be redundant as receiving
11+
// a replication stream is currently strictly single-threaded
1112
relations map[uint32]Relation
13+
connInfo *pgtype.ConnInfo
1214
}
1315

14-
func NewRelationSet() *RelationSet {
15-
return &RelationSet{relations: map[uint32]Relation{}}
16+
// NewRelationSet creates a new relation set.
17+
// Optionally ConnInfo can be provided, however currently we need some changes to pgx to get it out
18+
// from ReplicationConn.
19+
func NewRelationSet(ci *pgtype.ConnInfo) *RelationSet {
20+
return &RelationSet{map[uint32]Relation{}, ci}
1621
}
1722

1823
func (rs *RelationSet) Add(r Relation) {
1924
rs.relations[r.ID] = r
2025
}
2126

27+
func (rs *RelationSet) Get(ID uint32) (r Relation, ok bool) {
28+
r, ok = rs.relations[ID]
29+
return
30+
}
31+
2232
func (rs *RelationSet) Values(id uint32, row []Tuple) (map[string]pgtype.Value, error) {
2333
values := map[string]pgtype.Value{}
24-
rel, ok := rs.relations[id]
34+
rel, ok := rs.Get(id)
2535
if !ok {
2636
return values, fmt.Errorf("no relation for %d", id)
2737
}
38+
2839
// assert same number of row and columns
2940
for i, tuple := range row {
3041
col := rel.Columns[i]
3142
decoder := col.Decoder()
32-
// TODO: Pass in connection?
33-
if err := decoder.DecodeText(nil, tuple.Value); err != nil {
34-
return values, fmt.Errorf("error decoding tuple %d: %s", i, err)
43+
44+
if err := decoder.DecodeText(rs.connInfo, tuple.Value); err != nil {
45+
return nil, fmt.Errorf("error decoding tuple %d: %s", i, err)
3546
}
47+
3648
values[col.Name] = decoder
3749
}
50+
3851
return values, nil
3952
}
4053

0 commit comments

Comments
 (0)