Skip to content

Commit 59ac229

Browse files
rjl493456442nuoomnoy02MariusVanDerWijdenholiman
authored
core/state/snapshot: detect and clean up dangling storage snapshot in generation (#24811)
* core/state/snapshot: check dangling storages when generating snapshot * core/state/snapshot: polish * core/state/snapshot: wipe the last part of the dangling storages * core/state/snapshot: fix and add tests * core/state/snapshot: fix comment * README: remove mentions of fast sync (#24656) Co-authored-by: Marius van der Wijden <[email protected]> * core, cmd: expose dangling storage detector for wider usage * core/state/snapshot: rename variable * core, ethdb: use global iterators for snapshot generation * core/state/snapshot: polish * cmd, core/state/snapshot: polish * core/state/snapshot: polish * Update core/state/snapshot/generate.go Co-authored-by: Martin Holst Swende <[email protected]> * ethdb: extend db test suite and fix memorydb iterator * ethdb/dbtest: rollback changes * ethdb/memorydb: simplify iteration * core/state/snapshot: update dangling counter * core/state/snapshot: release iterators * core/state/snapshot: update metrics * core/state/snapshot: update time metrics * metrics/influxdb: temp solution to present counter meaningfully, remove it * add debug log, revert later * core/state/snapshot: fix iterator panic * all: customized snapshot iterator for backward iteration * core, ethdb: polish * core/state/snapshot: remove debug log * core/state/snapshot: address comments from peter * core/state/snapshot: reopen the iterator at the next position * ethdb, core/state/snapshot: address comment from peter * core/state/snapshot: reopen exhausted iterators Co-authored-by: Tbnoapi <[email protected]> Co-authored-by: Marius van der Wijden <[email protected]> Co-authored-by: Martin Holst Swende <[email protected]>
1 parent 2b0d0ce commit 59ac229

13 files changed

+965
-509
lines changed

cmd/geth/snapshot.go

+2-61
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"bytes"
2121
"encoding/json"
2222
"errors"
23-
"fmt"
2423
"os"
2524
"time"
2625

@@ -32,7 +31,6 @@ import (
3231
"github.com/ethereum/go-ethereum/core/state/snapshot"
3332
"github.com/ethereum/go-ethereum/core/types"
3433
"github.com/ethereum/go-ethereum/crypto"
35-
"github.com/ethereum/go-ethereum/ethdb"
3634
"github.com/ethereum/go-ethereum/log"
3735
"github.com/ethereum/go-ethereum/rlp"
3836
"github.com/ethereum/go-ethereum/trie"
@@ -223,15 +221,7 @@ func verifyState(ctx *cli.Context) error {
223221
return err
224222
}
225223
log.Info("Verified the state", "root", root)
226-
if err := checkDanglingDiskStorage(chaindb); err != nil {
227-
log.Error("Dangling snap disk-storage check failed", "root", root, "err", err)
228-
return err
229-
}
230-
if err := checkDanglingMemStorage(chaindb); err != nil {
231-
log.Error("Dangling snap mem-storage check failed", "root", root, "err", err)
232-
return err
233-
}
234-
return nil
224+
return snapshot.CheckDanglingStorage(chaindb)
235225
}
236226

237227
// checkDanglingStorage iterates the snap storage data, and verifies that all
@@ -240,56 +230,7 @@ func checkDanglingStorage(ctx *cli.Context) error {
240230
stack, _ := makeConfigNode(ctx)
241231
defer stack.Close()
242232

243-
chaindb := utils.MakeChainDatabase(ctx, stack, true)
244-
if err := checkDanglingDiskStorage(chaindb); err != nil {
245-
return err
246-
}
247-
return checkDanglingMemStorage(chaindb)
248-
249-
}
250-
251-
// checkDanglingDiskStorage checks if there is any 'dangling' storage data in the
252-
// disk-backed snapshot layer.
253-
func checkDanglingDiskStorage(chaindb ethdb.Database) error {
254-
log.Info("Checking dangling snapshot disk storage")
255-
var (
256-
lastReport = time.Now()
257-
start = time.Now()
258-
lastKey []byte
259-
it = rawdb.NewKeyLengthIterator(chaindb.NewIterator(rawdb.SnapshotStoragePrefix, nil), 1+2*common.HashLength)
260-
)
261-
defer it.Release()
262-
for it.Next() {
263-
k := it.Key()
264-
accKey := k[1:33]
265-
if bytes.Equal(accKey, lastKey) {
266-
// No need to look up for every slot
267-
continue
268-
}
269-
lastKey = common.CopyBytes(accKey)
270-
if time.Since(lastReport) > time.Second*8 {
271-
log.Info("Iterating snap storage", "at", fmt.Sprintf("%#x", accKey), "elapsed", common.PrettyDuration(time.Since(start)))
272-
lastReport = time.Now()
273-
}
274-
if data := rawdb.ReadAccountSnapshot(chaindb, common.BytesToHash(accKey)); len(data) == 0 {
275-
log.Error("Dangling storage - missing account", "account", fmt.Sprintf("%#x", accKey), "storagekey", fmt.Sprintf("%#x", k))
276-
return fmt.Errorf("dangling snapshot storage account %#x", accKey)
277-
}
278-
}
279-
log.Info("Verified the snapshot disk storage", "time", common.PrettyDuration(time.Since(start)), "err", it.Error())
280-
return nil
281-
}
282-
283-
// checkDanglingMemStorage checks if there is any 'dangling' storage in the journalled
284-
// snapshot difflayers.
285-
func checkDanglingMemStorage(chaindb ethdb.Database) error {
286-
start := time.Now()
287-
log.Info("Checking dangling snapshot difflayer journalled storage")
288-
if err := snapshot.CheckJournalStorage(chaindb); err != nil {
289-
return err
290-
}
291-
log.Info("Verified the snapshot journalled storage", "time", common.PrettyDuration(time.Since(start)))
292-
return nil
233+
return snapshot.CheckDanglingStorage(utils.MakeChainDatabase(ctx, stack, true))
293234
}
294235

295236
// traverseState is a helper function used for pruning verification.

core/state/snapshot/context.go

+241
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
// Copyright 2022 The go-ethereum Authors
2+
// This file is part of the go-ethereum library.
3+
//
4+
// The go-ethereum library is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU Lesser General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
//
9+
// The go-ethereum library is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU Lesser General Public License for more details.
13+
//
14+
// You should have received a copy of the GNU Lesser General Public License
15+
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16+
17+
package snapshot
18+
19+
import (
20+
"bytes"
21+
"encoding/binary"
22+
"errors"
23+
"time"
24+
25+
"github.com/ethereum/go-ethereum/common"
26+
"github.com/ethereum/go-ethereum/common/math"
27+
"github.com/ethereum/go-ethereum/core/rawdb"
28+
"github.com/ethereum/go-ethereum/ethdb"
29+
"github.com/ethereum/go-ethereum/ethdb/memorydb"
30+
"github.com/ethereum/go-ethereum/log"
31+
)
32+
33+
const (
34+
snapAccount = "account" // Identifier of account snapshot generation
35+
snapStorage = "storage" // Identifier of storage snapshot generation
36+
)
37+
38+
// generatorStats is a collection of statistics gathered by the snapshot generator
39+
// for logging purposes.
40+
type generatorStats struct {
41+
origin uint64 // Origin prefix where generation started
42+
start time.Time // Timestamp when generation started
43+
accounts uint64 // Number of accounts indexed(generated or recovered)
44+
slots uint64 // Number of storage slots indexed(generated or recovered)
45+
dangling uint64 // Number of dangling storage slots
46+
storage common.StorageSize // Total account and storage slot size(generation or recovery)
47+
}
48+
49+
// Log creates an contextual log with the given message and the context pulled
50+
// from the internally maintained statistics.
51+
func (gs *generatorStats) Log(msg string, root common.Hash, marker []byte) {
52+
var ctx []interface{}
53+
if root != (common.Hash{}) {
54+
ctx = append(ctx, []interface{}{"root", root}...)
55+
}
56+
// Figure out whether we're after or within an account
57+
switch len(marker) {
58+
case common.HashLength:
59+
ctx = append(ctx, []interface{}{"at", common.BytesToHash(marker)}...)
60+
case 2 * common.HashLength:
61+
ctx = append(ctx, []interface{}{
62+
"in", common.BytesToHash(marker[:common.HashLength]),
63+
"at", common.BytesToHash(marker[common.HashLength:]),
64+
}...)
65+
}
66+
// Add the usual measurements
67+
ctx = append(ctx, []interface{}{
68+
"accounts", gs.accounts,
69+
"slots", gs.slots,
70+
"storage", gs.storage,
71+
"dangling", gs.dangling,
72+
"elapsed", common.PrettyDuration(time.Since(gs.start)),
73+
}...)
74+
// Calculate the estimated indexing time based on current stats
75+
if len(marker) > 0 {
76+
if done := binary.BigEndian.Uint64(marker[:8]) - gs.origin; done > 0 {
77+
left := math.MaxUint64 - binary.BigEndian.Uint64(marker[:8])
78+
79+
speed := done/uint64(time.Since(gs.start)/time.Millisecond+1) + 1 // +1s to avoid division by zero
80+
ctx = append(ctx, []interface{}{
81+
"eta", common.PrettyDuration(time.Duration(left/speed) * time.Millisecond),
82+
}...)
83+
}
84+
}
85+
log.Info(msg, ctx...)
86+
}
87+
88+
// generatorContext carries a few global values to be shared by all generation functions.
89+
type generatorContext struct {
90+
stats *generatorStats // Generation statistic collection
91+
db ethdb.KeyValueStore // Key-value store containing the snapshot data
92+
account *holdableIterator // Iterator of account snapshot data
93+
storage *holdableIterator // Iterator of storage snapshot data
94+
batch ethdb.Batch // Database batch for writing batch data atomically
95+
logged time.Time // The timestamp when last generation progress was displayed
96+
}
97+
98+
// newGeneratorContext initializes the context for generation.
99+
func newGeneratorContext(stats *generatorStats, db ethdb.KeyValueStore, accMarker []byte, storageMarker []byte) *generatorContext {
100+
ctx := &generatorContext{
101+
stats: stats,
102+
db: db,
103+
batch: db.NewBatch(),
104+
logged: time.Now(),
105+
}
106+
ctx.openIterator(snapAccount, accMarker)
107+
ctx.openIterator(snapStorage, storageMarker)
108+
return ctx
109+
}
110+
111+
// openIterator constructs global account and storage snapshot iterators
112+
// at the interrupted position. These iterators should be reopened from time
113+
// to time to avoid blocking leveldb compaction for a long time.
114+
func (ctx *generatorContext) openIterator(kind string, start []byte) {
115+
if kind == snapAccount {
116+
iter := ctx.db.NewIterator(rawdb.SnapshotAccountPrefix, start)
117+
ctx.account = newHoldableIterator(rawdb.NewKeyLengthIterator(iter, 1+common.HashLength))
118+
return
119+
}
120+
iter := ctx.db.NewIterator(rawdb.SnapshotStoragePrefix, start)
121+
ctx.storage = newHoldableIterator(rawdb.NewKeyLengthIterator(iter, 1+2*common.HashLength))
122+
}
123+
124+
// reopenIterator releases the specified snapshot iterator and re-open it
125+
// in the next position. It's aimed for not blocking leveldb compaction.
126+
func (ctx *generatorContext) reopenIterator(kind string) {
127+
// Shift iterator one more step, so that we can reopen
128+
// the iterator at the right position.
129+
var iter = ctx.account
130+
if kind == snapStorage {
131+
iter = ctx.storage
132+
}
133+
hasNext := iter.Next()
134+
if !hasNext {
135+
// Iterator exhausted, release forever and create an already exhausted virtual iterator
136+
iter.Release()
137+
if kind == snapAccount {
138+
ctx.account = newHoldableIterator(memorydb.New().NewIterator(nil, nil))
139+
return
140+
}
141+
ctx.storage = newHoldableIterator(memorydb.New().NewIterator(nil, nil))
142+
return
143+
}
144+
next := iter.Key()
145+
iter.Release()
146+
ctx.openIterator(kind, next[1:])
147+
}
148+
149+
// close releases all the held resources.
150+
func (ctx *generatorContext) close() {
151+
ctx.account.Release()
152+
ctx.storage.Release()
153+
}
154+
155+
// iterator returns the corresponding iterator specified by the kind.
156+
func (ctx *generatorContext) iterator(kind string) *holdableIterator {
157+
if kind == snapAccount {
158+
return ctx.account
159+
}
160+
return ctx.storage
161+
}
162+
163+
// removeStorageBefore deletes all storage entries which are located before
164+
// the specified account. When the iterator touches the storage entry which
165+
// is located in or outside the given account, it stops and holds the current
166+
// iterated element locally.
167+
func (ctx *generatorContext) removeStorageBefore(account common.Hash) {
168+
var (
169+
count uint64
170+
start = time.Now()
171+
iter = ctx.storage
172+
)
173+
for iter.Next() {
174+
key := iter.Key()
175+
if bytes.Compare(key[1:1+common.HashLength], account.Bytes()) >= 0 {
176+
iter.Hold()
177+
break
178+
}
179+
count++
180+
ctx.batch.Delete(key)
181+
if ctx.batch.ValueSize() > ethdb.IdealBatchSize {
182+
ctx.batch.Write()
183+
ctx.batch.Reset()
184+
}
185+
}
186+
ctx.stats.dangling += count
187+
snapStorageCleanCounter.Inc(time.Since(start).Nanoseconds())
188+
}
189+
190+
// removeStorageAt deletes all storage entries which are located in the specified
191+
// account. When the iterator touches the storage entry which is outside the given
192+
// account, it stops and holds the current iterated element locally. An error will
193+
// be returned if the initial position of iterator is not in the given account.
194+
func (ctx *generatorContext) removeStorageAt(account common.Hash) error {
195+
var (
196+
count int64
197+
start = time.Now()
198+
iter = ctx.storage
199+
)
200+
for iter.Next() {
201+
key := iter.Key()
202+
cmp := bytes.Compare(key[1:1+common.HashLength], account.Bytes())
203+
if cmp < 0 {
204+
return errors.New("invalid iterator position")
205+
}
206+
if cmp > 0 {
207+
iter.Hold()
208+
break
209+
}
210+
count++
211+
ctx.batch.Delete(key)
212+
if ctx.batch.ValueSize() > ethdb.IdealBatchSize {
213+
ctx.batch.Write()
214+
ctx.batch.Reset()
215+
}
216+
}
217+
snapWipedStorageMeter.Mark(count)
218+
snapStorageCleanCounter.Inc(time.Since(start).Nanoseconds())
219+
return nil
220+
}
221+
222+
// removeStorageLeft deletes all storage entries which are located after
223+
// the current iterator position.
224+
func (ctx *generatorContext) removeStorageLeft() {
225+
var (
226+
count uint64
227+
start = time.Now()
228+
iter = ctx.storage
229+
)
230+
for iter.Next() {
231+
count++
232+
ctx.batch.Delete(iter.Key())
233+
if ctx.batch.ValueSize() > ethdb.IdealBatchSize {
234+
ctx.batch.Write()
235+
ctx.batch.Reset()
236+
}
237+
}
238+
ctx.stats.dangling += count
239+
snapDanglingStorageMeter.Mark(int64(count))
240+
snapStorageCleanCounter.Inc(time.Since(start).Nanoseconds())
241+
}

0 commit comments

Comments
 (0)