diff --git a/Makefile b/Makefile index 99e1b7f10c..de78093e02 100644 --- a/Makefile +++ b/Makefile @@ -397,8 +397,19 @@ evmstate_test: | build deps evmstate txparse: | build deps $(ENV_SCRIPT) nim c $(NIM_PARAMS) "tools/txparse/$@.nim" +# build syncer debugging and analysis tools +SYNCER_TOOLS_DIR := tools/syncer +SYNCER_TOOLS := $(foreach name,trace inspect replay,syncer_test_client_$(name)) +.PHONY: syncer-tools syncer-tools-clean $(SYNCER_TOOLS) +syncer-tools: $(SYNCER_TOOLS) +syncer-tools-clean: + rm -f $(foreach exe,$(SYNCER_TOOLS),build/$(exe)) +$(SYNCER_TOOLS): | build deps rocksdb + echo -e $(BUILD_MSG) "build/$@" + $(ENV_SCRIPT) nim c $(NIM_PARAMS) -o:build/$@ "$(SYNCER_TOOLS_DIR)/$@.nim" + # usual cleaning -clean: | clean-common +clean: | clean-common syncer-tools-clean rm -rf build/{nimbus,nimbus_execution_client,nimbus_portal_client,fluffy,portal_bridge,libverifproxy,nimbus_verified_proxy,$(TOOLS_CSV),$(PORTAL_TOOLS_CSV),all_tests,test_kvstore_rocksdb,test_rpc,all_portal_tests,all_history_network_custom_chain_tests,test_portal_testnet,utp_test_app,utp_test,*.dSYM} rm -rf tools/t8n/{t8n,t8n_test} rm -rf tools/evmstate/{evmstate,evmstate_test} diff --git a/execution_chain/sync/beacon.nim b/execution_chain/sync/beacon.nim index 00bf7cf08f..d679985c8f 100644 --- a/execution_chain/sync/beacon.nim +++ b/execution_chain/sync/beacon.nim @@ -15,8 +15,10 @@ import pkg/stew/[interval_set, sorted_set], ../core/chain, ../networking/p2p, - ./beacon/worker/headers/headers_target, ./beacon/[beacon_desc, worker], + ./beacon/worker/blocks/[blocks_fetch, blocks_import], + ./beacon/worker/headers/[headers_fetch, headers_target], + ./beacon/worker/update, ./[sync_sched, wire_protocol] export @@ -25,33 +27,62 @@ export logScope: topics = "beacon sync" +# ------------------------------------------------------------------------------ +# Interceptable handlers +# ------------------------------------------------------------------------------ + +proc schedDaemonCB( + ctx: BeaconCtxRef; + ): Future[Duration] + {.async: (raises: []).} = + return worker.runDaemon(ctx, "RunDaemon") # async/template + +proc schedStartCB(buddy: BeaconBuddyRef): bool = + return worker.start(buddy, "RunStart") + +proc schedStopCB(buddy: BeaconBuddyRef) = + worker.stop(buddy, "RunStop") + +proc schedPoolCB(buddy: BeaconBuddyRef; last: bool; laps: int): bool = + return worker.runPool(buddy, last, laps, "RunPool") + +proc schedPeerCB( + buddy: BeaconBuddyRef; + ): Future[Duration] + {.async: (raises: []).} = + return worker.runPeer(buddy, "RunPeer") # async/template + +proc noOpFn(buddy: BeaconBuddyRef) = discard +proc noOpEx(self: BeaconHandlersSyncRef) = discard + # ------------------------------------------------------------------------------ # Virtual methods/interface, `mixin` functions # ------------------------------------------------------------------------------ proc runSetup(ctx: BeaconCtxRef): bool = - worker.setup(ctx, "RunSetup") + return worker.setup(ctx, "RunSetup") proc runRelease(ctx: BeaconCtxRef) = worker.release(ctx, "RunRelease") -proc runDaemon(ctx: BeaconCtxRef): Future[Duration] {.async: (raises: []).} = - return worker.runDaemon(ctx, "RunDaemon") - proc runTicker(ctx: BeaconCtxRef) = worker.runTicker(ctx, "RunTicker") + +proc runDaemon(ctx: BeaconCtxRef): Future[Duration] {.async: (raises: []).} = + return await ctx.handler.schedDaemon(ctx) + proc runStart(buddy: BeaconBuddyRef): bool = - worker.start(buddy, "RunStart") + return buddy.ctx.handler.schedStart(buddy) proc runStop(buddy: BeaconBuddyRef) = - worker.stop(buddy, "RunStop") + buddy.ctx.handler.schedStop(buddy) proc runPool(buddy: BeaconBuddyRef; last: bool; laps: int): bool = - worker.runPool(buddy, last, laps, "RunPool") + return buddy.ctx.handler.schedPool(buddy, last, laps) proc runPeer(buddy: BeaconBuddyRef): Future[Duration] {.async: (raises: []).} = - return worker.runPeer(buddy, "RunPeer") + return await buddy.ctx.handler.schedPeer(buddy) # ------------------------------------------------------------------------------ # Public functions @@ -83,6 +114,25 @@ proc config*( desc.initSync(ethNode, maxPeers) desc.ctx.pool.chain = chain + # Set up handlers so they can be overlayed + desc.ctx.pool.handlers = BeaconHandlersSyncRef( + version: 0, + activate: updateActivateCB, + suspend: updateSuspendCB, + schedDaemon: schedDaemonCB, + schedStart: schedStartCB, + schedStop: schedStopCB, + schedPool: schedPoolCB, + schedPeer: schedPeerCB, + getBlockHeaders: getBlockHeadersCB, + syncBlockHeaders: noOpFn, + getBlockBodies: getBlockBodiesCB, + syncBlockBodies: noOpFn, + importBlock: importBlockCB, + syncImportBlock: noOpFn, + startSync: noOpEx, + stopSync: noOpEx) + if not desc.lazyConfigHook.isNil: desc.lazyConfigHook(desc) desc.lazyConfigHook = nil @@ -99,10 +149,16 @@ proc configTarget*(desc: BeaconSyncRef; hex: string; isFinal: bool): bool = proc start*(desc: BeaconSyncRef): bool = doAssert not desc.ctx.isNil - desc.startSync() + if desc.startSync(): + let w = BeaconHandlersSyncRef(desc.ctx.pool.handlers) + w.startSync(w) + return true + # false proc stop*(desc: BeaconSyncRef) {.async.} = doAssert not desc.ctx.isNil + let w = BeaconHandlersSyncRef(desc.ctx.pool.handlers) + w.stopSync(w) await desc.stopSync() # ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/beacon_desc.nim b/execution_chain/sync/beacon/beacon_desc.nim index d453c73485..1a98eab957 100644 --- a/execution_chain/sync/beacon/beacon_desc.nim +++ b/execution_chain/sync/beacon/beacon_desc.nim @@ -25,4 +25,10 @@ type ## Instance descriptor, extends scheduler object lazyConfigHook*: BeaconSyncConfigHook + BeaconHandlersSyncRef* = ref object of BeaconHandlersRef + ## Add start/stop helpers to function list. By default, this functiona + ## are no-ops. + startSync*: proc(self: BeaconHandlersSyncRef) {.gcsafe, raises: [].} + stopSync*: proc(self: BeaconHandlersSyncRef) {.gcsafe, raises: [].} + # End diff --git a/execution_chain/sync/beacon/worker/blocks/blocks_blocks.nim b/execution_chain/sync/beacon/worker/blocks/blocks_blocks.nim index b1508b5fe4..b795f31a05 100644 --- a/execution_chain/sync/beacon/worker/blocks/blocks_blocks.nim +++ b/execution_chain/sync/beacon/worker/blocks/blocks_blocks.nim @@ -17,12 +17,27 @@ import ../../../../networking/p2p, ../../../wire_protocol/types, ../[update, worker_desc], - ./[blocks_fetch, blocks_helpers, blocks_import, blocks_unproc] + ./[blocks_fetch, blocks_helpers, blocks_unproc] # ------------------------------------------------------------------------------ # Private helpers # ------------------------------------------------------------------------------ +template importBlock( + buddy: BeaconBuddyRef; + blk: EthBlock; + effPeerID: Hash; + ): Result[Duration,BeaconError] = + ## Async/template + ## + ## Wrapper around `importBlock()` handler + ## + let + ctx = buddy.ctx + rc = await ctx.handler.importBlock(buddy, blk, effPeerID) + ctx.handler.syncImportBlock(buddy) # debugging, trace, replay + rc + proc getNthHash(ctx: BeaconCtxRef; blocks: seq[EthBlock]; n: int): Hash32 = ctx.hdrCache.getHash(blocks[n].header.number).valueOr: return zeroHash32 @@ -201,7 +216,7 @@ template blocksImport*( for n in 0 ..< blocks.len: let nBn = blocks[n].header.number - discard (await buddy.importBlock(blocks[n], peerID)).valueOr: + buddy.importBlock(blocks[n], peerID).isOkOr: if error.excp != ECancelledError: isError = true diff --git a/execution_chain/sync/beacon/worker/blocks/blocks_fetch.nim b/execution_chain/sync/beacon/worker/blocks/blocks_fetch.nim index 9d2f3ac49d..0e75599fb7 100644 --- a/execution_chain/sync/beacon/worker/blocks/blocks_fetch.nim +++ b/execution_chain/sync/beacon/worker/blocks/blocks_fetch.nim @@ -18,11 +18,30 @@ import ../worker_desc, ./blocks_helpers +logScope: + topics = "beacon sync" + +# ------------------------------------------------------------------------------ +# Private helper +# ----------------------------------------------------------------------------- + +template getBlockBodies( + buddy: BeaconBuddyRef; + req: BlockBodiesRequest; + ): Result[FetchBodiesData,BeaconError] = + ## Async/template + ## + ## Wrapper around `getBlockBodies()` handler + ## + let rc = await buddy.ctx.handler.getBlockBodies(buddy, req) + buddy.ctx.handler.syncBlockBodies(buddy) # debugging, sync, replay + rc + # ------------------------------------------------------------------------------ -# Private helpers +# Public handler # ----------------------------------------------------------------------------- -proc getBlockBodies( +proc getBlockBodiesCB*( buddy: BeaconBuddyRef; req: BlockBodiesRequest; ): Future[Result[FetchBodiesData,BeaconError]] @@ -70,7 +89,7 @@ template fetchBodies*( trace trEthSendSendingGetBlockBodies, peer, nReq, bdyErrors=buddy.bdyErrors - let rc = await buddy.getBlockBodies(request) + let rc = buddy.getBlockBodies(request) var elapsed: Duration if rc.isOk: elapsed = rc.value.elapsed diff --git a/execution_chain/sync/beacon/worker/blocks/blocks_import.nim b/execution_chain/sync/beacon/worker/blocks/blocks_import.nim index 9b8d072a01..2e170bca1c 100644 --- a/execution_chain/sync/beacon/worker/blocks/blocks_import.nim +++ b/execution_chain/sync/beacon/worker/blocks/blocks_import.nim @@ -16,11 +16,14 @@ import ../../../wire_protocol, ../worker_desc +logScope: + topics = "beacon sync" + # ------------------------------------------------------------------------------ -# Public function +# Public handler # ------------------------------------------------------------------------------ -proc importBlock*( +proc importBlockCB*( buddy: BeaconBuddyRef; blk: EthBlock; effPeerID: Hash; diff --git a/execution_chain/sync/beacon/worker/headers/headers_fetch.nim b/execution_chain/sync/beacon/worker/headers/headers_fetch.nim index 3d48732bdb..62834a0801 100644 --- a/execution_chain/sync/beacon/worker/headers/headers_fetch.nim +++ b/execution_chain/sync/beacon/worker/headers/headers_fetch.nim @@ -18,11 +18,30 @@ import ../worker_desc, ./headers_helpers +logScope: + topics = "beacon sync" + # ------------------------------------------------------------------------------ # Private helpers +# ----------------------------------------------------------------------------- + +template getBlockHeaders( + buddy: BeaconBuddyRef; + req: BlockHeadersRequest; + ): Result[FetchHeadersData,BeaconError] = + ## Async/template + ## + ## Wrapper around `getBlockHeaders()` handler + ## + let rc = await buddy.ctx.handler.getBlockHeaders(buddy, req) + buddy.ctx.handler.syncBlockHeaders(buddy) # debugging, sync, replay + rc + +# ------------------------------------------------------------------------------ +# Public handler # ------------------------------------------------------------------------------ -proc getBlockHeaders( +proc getBlockHeadersCB*( buddy: BeaconBuddyRef; req: BlockHeadersRequest; ): Future[Result[FetchHeadersData,BeaconError]] @@ -88,7 +107,7 @@ template fetchHeadersReversed*( trace trEthSendSendingGetBlockHeaders & " reverse", peer, req=ivReq, nReq=req.maxResults, hash=topHash.toStr, hdrErrors=buddy.hdrErrors - let rc = await buddy.getBlockHeaders(req) + let rc = buddy.getBlockHeaders(req) var elapsed: Duration if rc.isOk: elapsed = rc.value.elapsed diff --git a/execution_chain/sync/beacon/worker/start_stop.nim b/execution_chain/sync/beacon/worker/start_stop.nim index 6b1d3c70bd..bf8f01ec0e 100644 --- a/execution_chain/sync/beacon/worker/start_stop.nim +++ b/execution_chain/sync/beacon/worker/start_stop.nim @@ -14,7 +14,7 @@ import pkg/[chronicles, chronos, eth/common, metrics], ../../../networking/p2p, ../../wire_protocol, - ./[blocks, headers, update, worker_desc] + ./[blocks, headers, worker_desc] type SyncStateData = tuple @@ -59,8 +59,8 @@ proc setupServices*(ctx: BeaconCtxRef; info: static[string]) = # Set up the notifier informing when a new syncer session has started. ctx.hdrCache.start proc() = - # Activates the syncer. Work will be picked up by peers when available. - ctx.updateActivateSyncer() + # This directive captures `ctx` for calling the activation handler. + ctx.handler.activate(ctx) # Provide progress info call back handler ctx.pool.chain.com.beaconSyncerProgress = proc(): SyncStateData = diff --git a/execution_chain/sync/beacon/worker/update.nim b/execution_chain/sync/beacon/worker/update.nim index 2bb42a0460..343796cf88 100644 --- a/execution_chain/sync/beacon/worker/update.nim +++ b/execution_chain/sync/beacon/worker/update.nim @@ -30,23 +30,6 @@ declareGauge nec_sync_head, "" & # Private functions, state handler helpers # ------------------------------------------------------------------------------ -proc updateSuspendSyncer(ctx: BeaconCtxRef) = - ## Clean up sync target buckets, stop syncer activity, and and get ready - ## for awaiting a new request from the `CL`. - ## - ctx.hdrCache.clear() - - ctx.pool.failedPeers.clear() - ctx.pool.seenData = false - - ctx.hibernate = true - - metrics.set(nec_sync_last_block_imported, 0) - metrics.set(nec_sync_head, 0) - - info "Suspending syncer", base=ctx.chain.baseNumber.bnStr, - head=ctx.chain.latestNumber.bnStr, nSyncPeers=ctx.pool.nBuddies - proc commitCollectHeaders(ctx: BeaconCtxRef; info: static[string]): bool = ## Link header chain into `FC` module. Gets ready for block import. ## @@ -227,7 +210,7 @@ proc updateSyncState*(ctx: BeaconCtxRef; info: static[string]) = # Final sync scrum layout reached or inconsistent/impossible state if newState == idle: - ctx.updateSuspendSyncer() + ctx.handler.suspend(ctx) proc updateLastBlockImported*(ctx: BeaconCtxRef; bn: BlockNumber) = @@ -238,7 +221,7 @@ proc updateLastBlockImported*(ctx: BeaconCtxRef; bn: BlockNumber) = # Public functions, call-back handler ready # ------------------------------------------------------------------------------ -proc updateActivateSyncer*(ctx: BeaconCtxRef) = +proc updateActivateCB*(ctx: BeaconCtxRef) = ## If in hibernate mode, accept a cache session and activate syncer ## if ctx.hibernate and # only in idle mode @@ -277,6 +260,24 @@ proc updateActivateSyncer*(ctx: BeaconCtxRef) = head=ctx.chain.latestNumber.bnStr, state=ctx.hdrCache.state, initTarget=ctx.pool.initTarget.isSome(), nSyncPeers=ctx.pool.nBuddies + +proc updateSuspendCB*(ctx: BeaconCtxRef) = + ## Clean up sync target buckets, stop syncer activity, and and get ready + ## for a new sync request from the `CL`. + ## + ctx.hdrCache.clear() + + ctx.pool.failedPeers.clear() + ctx.pool.seenData = false + + ctx.hibernate = true + + metrics.set(nec_sync_last_block_imported, 0) + metrics.set(nec_sync_head, 0) + + info "Suspending syncer", base=ctx.chain.baseNumber.bnStr, + head=ctx.chain.latestNumber.bnStr, nSyncPeers=ctx.pool.nBuddies + # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/worker/worker_desc.nim b/execution_chain/sync/beacon/worker/worker_desc.nim index 5f7e7dbb29..5acb9af903 100644 --- a/execution_chain/sync/beacon/worker/worker_desc.nim +++ b/execution_chain/sync/beacon/worker/worker_desc.nim @@ -58,6 +58,64 @@ type # ------------------- + ActivateSyncerHdl* = + proc(ctx: BeaconCtxRef) {.gcsafe, raises: [].} + ## Syncer activation function run when notified by header chain cache. + + SuspendSyncerHdl* = proc(ctx: BeaconCtxRef) {.gcsafe, raises: [].} + ## Syncer hibernate function run when the current session fas finished. + + SchedDaemonHdl* = + proc(ctx: BeaconCtxRef): Future[Duration] {.async: (raises: []).} + ## See `runDaemon()` described in `sync_sched.nim` + + SchedStartHdl* = + proc(buddy: BeaconBuddyRef): bool {.gcsafe, raises: [].} + ## See `runStart()` described in `sync_sched.nim` + + SchedStopHdl* = + proc(buddy: BeaconBuddyRef) {.gcsafe, raises: [].} + ## See `runStart()` described in `sync_sched.nim` + + SchedPoolHdl* = + proc(buddy: BeaconBuddyRef; last: bool; laps: int): + bool {.gcsafe, raises: [].} + ## See `runPool()` described in `sync_sched.nim` + + SchedPeerHdl* = + proc(buddy: BeaconBuddyRef): Future[Duration] {.async: (raises: []).} + ## See `runPeer()` described in `sync_sched.nim` + + GetBlockHeadersHdl* = + proc(buddy: BeaconBuddyRef; req: BlockHeadersRequest): + Future[Result[FetchHeadersData,BeaconError]] {.async: (raises: []).} + ## From the ethXX argument peer implied by `buddy` fetch a list of + ## headers. + + SyncBlockHeadersHdl* = + proc(buddy: BeaconBuddyRef) {.gcsafe, raises: [].} + ## Status of syncer after `GetBlockHeadersHdl` + + GetBlockBodiesHdl* = + proc(buddy: BeaconBuddyRef; request: BlockBodiesRequest): + Future[Result[FetchBodiesData,BeaconError]] {.async: (raises: []).} + ## Fetch bodies from the network. + + SyncBlockBodiesHdl* = + proc(buddy: BeaconBuddyRef) {.gcsafe, raises: [].} + ## Status of syncer after `GetBlockBodiesHdl` + + ImportBlockHdl* = + proc(buddy: BeaconBuddyRef; blk: EthBlock; effPeerID: Hash): + Future[Result[Duration,BeaconError]] {.async: (raises: []).} + ## Import a sinmgle block into `FC` module. + + SyncImportBlockHdl* = + proc(buddy: BeaconBuddyRef) {.gcsafe, raises: [].} + ## Status of syncer after `ImportBlockHdl` + + # ------------------- + BnRangeSet* = IntervalSetRef[BlockNumber,uint64] ## Disjunct sets of block number intervals @@ -127,6 +185,24 @@ type hash: Hash32 ## Some block hash to sync towards to isFinal: bool ## The `hash` belongs to a finalised block + BeaconHandlersRef* = ref object of RootRef + ## Selected handlers that can be replaced for tracing. The version number + ## allows to identify overlays. + version*: int ## Overlay version unless 0 (i.e. base=0) + activate*: ActivateSyncerHdl ## Allows for redirect (e.g. tracing) + suspend*: SuspendSyncerHdl ## Ditto + schedDaemon*: SchedDaemonHdl ## ... + schedStart*: SchedStartHdl + schedStop*: SchedStopHdl + schedPool*: SchedPoolHdl + schedPeer*: SchedPeerHdl + getBlockHeaders*: GetBlockHeadersHdl + syncBlockHeaders*: SyncBlockHeadersHdl + getBlockBodies*: GetBlockBodiesHdl + syncBlockBodies*: SyncBlockBodiesHdl + importBlock*: ImportBlockHdl + syncImportBlock*: SyncImportBlockHdl + BeaconCtxData* = object ## Globally shared data extension nBuddies*: int ## Number of active workers @@ -139,6 +215,7 @@ type chain*: ForkedChainRef ## Core database, FCU support hdrCache*: HeaderChainRef ## Currently in tandem with `chain` + handlers*: BeaconHandlersRef ## Allows for redirect (e.g. tracing) # Info, debugging, and error handling stuff nProcError*: Table[Hash,BuddyError] ## Per peer processing error @@ -176,6 +253,10 @@ func hdrCache*(ctx: BeaconCtxRef): HeaderChainRef = ## Shortcut ctx.pool.hdrCache +func handler*(ctx: BeaconCtxRef): BeaconHandlersRef = + ## Shortcut + ctx.pool.handlers + # ----- func hibernate*(ctx: BeaconCtxRef): bool = diff --git a/tools/syncer/.gitignore b/tools/syncer/.gitignore new file mode 100644 index 0000000000..2d19fc766d --- /dev/null +++ b/tools/syncer/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/tools/syncer/README.md b/tools/syncer/README.md new file mode 100644 index 0000000000..13b7952ccc --- /dev/null +++ b/tools/syncer/README.md @@ -0,0 +1,312 @@ +Setting up a routing tunnel from a hidden Rfc 1918 network +========================================================== + +Routing bidirectional traffic selectively through a dedicated tunnel to a +public server (e.g. a non-expensive rented *cloud* system) can emulate many +properties of a publicly exposed system. This can be used for a test system +hidden behind a digital subscriber-line or dial-up network so it can emulate +many properties of a publicly exposed system. + +The systems involved do typically share other services besides the ones +installed for tunnelling. Some care must be taken on the local system when +allowing incoming data connections. This can be mitigated by using network +filters (e.g. `nftables` on *Linux*) for accepting incoming connections only +on sub-systems, e.g. `qemu` or `docker` virtual hosts. + +General layout +-------------- + +The following scenario was set up for testing the syncer + + EXTERN LOCAL SUB1 + +-------+ +-------+ +------+ + | | tunnel | | | | + | o---l----------r---o o---s---+---a---o | + | o | | | | | | + +---|---+ +---|---+ | +------+ + x y | + | | | SUB2 + ----//----+----//------------//--+ | +------+ + internet internet MASQUERADE | | | + +---b---o | + | | | + | +------+ + : ... + +where *EXTERN* is a system with a public IP address (on interface **x**), +fully exposed to the internet (e.g. a rented *cloud* server). *LOCAL* is a +system on a local network (typically with Rfc 1918 or Rfc 5737 addresses) +and has access to the internet via *SNAT* or *MASQUERADE* address translation +techniques on interface **y**. + +The system *LOCAL* accesses services on system *EXTERN* via the internet +connection. An *EXTERN* -- *LOCAL* logical connection facilitated by +interfaces **X** and **Y** allows for setting up a virtual peer-to-peer +tunnel for general IP (UDP and TCP needed) between both systems. This tunnel +is depicted above the dedicated *EXTERN* -- *LOCAL* connection with +interfaces **l** and **r**. + +The system *LOCAL* provides routing services to the internet for systems +*SUB1*, *SUB2*, etc. via interface **s** on *LOCAL*. Technically, these +sub-systems might run on a virtual system within the *LOCAL* system. + + +Example interface and network addresses +--------------------------------------- + +These addresses as used the below configuration scripts are listed here. + +| interface | IP address | netmask | gateway | additional info +|-----------| ----------------:|:--------|:--------------|----------------- +| **a** | 192.168.122.22 | /24 | 192.168.122.1 | +| **b** | 192.168.122.23 | /24 | 192.168.122.1 | +| **l** | 10.3.4.1 | /32 | n/a | point-to-point +| **r** | 10.3.4.2 | /32 | n/a | point-to-point +| **s** | 192.168.122.1 | /24 | | +| **x** | | | | public address +| **y** | | | 172.17.33.1 | dynamic, DHCP + + +Why not using *ssh* or any other TCP tunnel software +---------------------------------------------------- + +With *ssh*, one can logically pre-allocate a list of TCP connections between +two systems. This sets up listeners on the one end of the tunnel and comes out +on the other end when an application is connecting to a listener. It is most +easily set up and provides reliable, encrypted connections. But this does not +the tunnel wanted here. + +In another *ssh* mode, one can build a connection and tie it to a *pty* or +a *tun* device. In the case of a *pty*, one can install a *ppp* connection +on top of that. In either case, one ends up with a pair of network interfaces +that could be used for implementing the **r**--**l** tunnel for the above +scenario. + +Unfortunately, that scenario works only well in some rare cases (probably on +a *LAN*) for TCP over *ssh*, the reason being that TCP traffic control will +adjust simultaneously: the outer *ssh* TCP connection and the inner TCP data +connection (see details on +[PPP over *ssh*](https://web.archive.org/web/20220103191127/http://sites.inka.de/bigred/devel/tcp-tcp.html) +or [TCP over TCP](https://lsnl.jp/~ohsaki/papers/Honda05_ITCom.pdf).) + + +Suitable **r**--**l** tunnel software solutions +----------------------------------------------- + +The software package used here is `quicktun` which runs a single UDP based +peer-to-peer tunnel and provides several flavours of encryption. + +Other solutions would be `openVPN` which provides multiple topologies with +pluggable authentication and encryption, or `vtun` which provides a server +centric star topology (with optional encryption considered weak.) + + +Setting up the **r**--**l** tunnel on Debian bookworm +----------------------------------------------------- + +A detailed description on the `quicktun` software is available at +[QuickTun](http://wiki.ucis.nl/QuickTun). + +All command line commands displayed here must be run with administrator +privileges, i.e. as user **root**. + +Install tunnel software on *LOCAl* and *EXTERN* via + + apt install quicktun + +Generate and remember two key pairs using `keypair` twice. This gives keys + + SECRET: + PUBLIC: + + SECRET: + PUBLIC: + +On *LOCAL* set it up as client. Install the file + + /etc/network/interfaces.d/client-tun + +with contents + + # Do not use the automatic directive "auto tun0" here. This would take + # up this tunnel interface too early. Rather use `ifup tun0` in + # "/etc/rc.local". On Debian unless done so, this start up file can + # be enabled via + # chmod +x /etc/rc.local + # systemctl enable rc-local.service + # + iface tun0 inet static + # See http://wiki.ucis.nl/QuickTun for details. Contrary to the + # examples there, comments must not follow the directives on the + # same line to the right. + address 10.3.4.2 + pointopoint 10.3.4.1 + netmask 255.255.255.255 + qt_local_address 0.0.0.0 + + # Explicit port number (default 2998) + qt_remote_port 2992 + qt_remote_address + + # Available protocols: raw, nacl0, nacltai, salty + qt_protocol nacl0 + qt_tun_mode 1 + + # This is the private tunnel key which should be accessible by + # root only. Public access to this config file should be resticted + # to root only, e.g. via + # chmod go-rw + qt_private_key + + # Server public key + qt_public_key + + # Make certain that tunnel packets can be sent via outbound + # interface. + up route add -host gw 172.17.33.1 || true + down route del -host gw 172.17.33.1 || true + + # Route virtual network data into the tunnel. To achieve this, two + # routing tables are used: "main" and a local one "8". The "main" + # table is the standard table, the local one "8" is used to route + # a set of traffic into the tunnel interface. Routing tables "main" + # or "8" are selected by the policy set up via + # "ip rules add ... lookup " + up ip rule add from 192.168.122.1 lookup main || true + up ip rule add from 192.168.122.0/24 lookup 8 || true + up ip rule add from 10.3.4.2 lookup 8 || true + up ip route add default via 10.3.4.1 table 8 || true + up ip route add 192.168.122.0/24 via 192.168.122.1 table 8 || true + + down ip rule del from 192.168.122.1 lookup main || true + down ip rule del from 192.168.122.0/24 || true + down ip rule del from 10.3.4.2 lookup 8 || true + down ip route flush table 8 || true + + # End + +and on *EXTERN* set it up as server. Install the file + + /etc/network/interfaces.d/server-tun + +with contents + + iface tun0 inet static + address 10.3.4.1 + pointopoint 10.3.4.2 + netmask 255.255.255.255 + qt_remote_address 0.0.0.0 + + qt_local_port 2992 + qt_local_address + + qt_protocol nacl0 + qt_tun_mode 1 + + # Do not forget to run `chmod go-rw + qt_private_key + qt_public_key + + # Route into hidden sub-network which will be exposed after NAT. + up route add -net 192.168.122.0 netmask 255.255.255.0 gw 10.3.4.1 + down route del -net 192.168.122.0 netmask 255.255.255.0 gw 10.3.4.1 + +On either system *EXTERN* and *LOCAL* make certain that the file + + /etc/network/interfaces + +contains a line + + source /etc/network/interfaces.d/* + +Then the tunnel can be established by running + + ifup tun0 + +on either system. In order to verify, try running + + ping 10.3.4.2 # on EXTERN + ping 10.3.4.1 # on LOCAL + + +Configuring `iptables` on the *EXTERN* server +--------------------------------------------- + +As a suggestion for an `nftables` filter and NAT rules set on a *Linux* host +*EXTERN* would be + + #! /usr/sbin/nft -f + + define wan_if = + define wan_ip = + define tun_if = tun0 + + define gw_ip = 10.3.4.2 + define gw_ports = { 30600-30699, 9010-9019 } + define h1_ip = 192.168.122.22 + define h1_ports = 30700-30799 + define h2_ip = 192.168.122.23 + define h2_ports = 9000-9009 + + table ip filter { + # Accept all input and output + chain INPUT { type filter hook input priority filter; policy accept; } + chain OUTPUT { type filter hook output priority filter; policy accept; } + + # Selective tunnel transit and NAT debris + chain FORWARD { + type filter hook forward priority filter; policy drop; + ct state related,established counter accept + iif $tun_if ct state new counter accept + iif $tun_if counter accept + iif $wan_if ct state new counter accept + iif $wan_if counter accept + counter log prefix "Tunnel Drop " level info + counter drop + } + } + table ip nat { + chain INPUT { type nat hook input priority 100; policy accept; } + chain OUTPUT { type nat hook output priority -100; policy accept; } + + # Map new connection destination address depending on dest. port + chain PREROUTING { + type nat hook prerouting priority dstnat; policy accept; + ip daddr $wan_ip tcp dport $h1_ports counter dnat to $h1_ip + ip daddr $wan_ip udp dport $h1_ports counter dnat to $h1_ip + ip daddr $wan_ip tcp dport $h2_ports counter dnat to $h2_ip + ip daddr $wan_ip udp dport $h2_ports counter dnat to $h2_ip + ip daddr $wan_ip tcp dport $gw_ports counter dnat to $gw_ip + ip daddr $wan_ip udp dport $gw_ports counter dnat to $gw_ip + } + # Map new connection source address to wan address + chain POSTROUTING { + type nat hook postrouting priority srcnat; policy accept; + oif $wan_if ip daddr $wan_ip counter return + oif $wan_if ip saddr $gw_ip counter snat to $wan_ip + oif $wan_if ip saddr $h1_ip counter snat to $wan_ip + oif $wan_if ip saddr $h2_ip counter snat to $wan_ip + } + } + + +Running Nimbus EL or CL on *LOCAL* client and/or *SUB1*. *SUB2* +--------------------------------------------------------------- + +When starting `nimbus_execution_client` on *SUB1*, *SUB2*, etc. systems, +one needs to set options + + --engine-api-address=0.0.0.0 + --nat=extip: + +and for the `nimbus_beacon_node` on *SUB1*, *SUB2*, etc. use + + --nat=extip: + +For running both, `nimbus_execution_client` and `nimbus_beacon_node` +on *LOCAL* directly, one needs to set options + + --listen-address=10.3.4.2 + --nat=extip: + +on either system. diff --git a/tools/syncer/helpers/sync_ticker.nim b/tools/syncer/helpers/sync_ticker.nim new file mode 100644 index 0000000000..4bace135f9 --- /dev/null +++ b/tools/syncer/helpers/sync_ticker.nim @@ -0,0 +1,181 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises: [].} + +import + std/strutils, + pkg/[chronos, chronicles, eth/common, stint, stew/interval_set], + ../../../execution_chain/sync/beacon/worker/[blocks, headers, worker_desc] + +logScope: + topics = "beacon ticker" + +type + TickerStats = object + ## Full sync state (see `TickerFullStatsUpdater`) + base: BlockNumber + latest: BlockNumber + coupler: BlockNumber + dangling: BlockNumber + top: BlockNumber + head: BlockNumber + target: BlockNumber + activeOk: bool + + hdrUnprocTop: BlockNumber + nHdrUnprocessed: uint64 + nHdrUnprocFragm: int + nHdrStaged: int + hdrStagedTop: BlockNumber + + blkUnprocBottom: BlockNumber + nBlkUnprocessed: uint64 + nBlkUnprocFragm: int + nBlkStaged: int + blkStagedBottom: BlockNumber + + state: SyncState + nBuddies: int + + TickerRef* = ref object of RootRef + ## Ticker descriptor object + started: Moment + visited: Moment + lastStats: TickerStats + +# ------------------------------------------------------------------------------ +# Private functions: printing ticker messages +# ------------------------------------------------------------------------------ + +const + tickerLogInterval = chronos.seconds(2) + tickerLogSuppressMax = chronos.seconds(100) + +proc updater(ctx: BeaconCtxRef): TickerStats = + ## Legacy stuff, will be probably be superseded by `metrics` + TickerStats( + base: ctx.chain.baseNumber, + latest: ctx.chain.latestNumber, + coupler: ctx.headersUnprocTotalBottom(), + dangling: ctx.hdrCache.antecedent.number, + top: ctx.subState.top, + head: ctx.subState.head, + target: ctx.hdrCache.latestConsHeadNumber, + activeOk: ctx.pool.lastState != idle, + + nHdrStaged: ctx.headersStagedQueueLen(), + hdrStagedTop: ctx.headersStagedQueueTopKey(), + hdrUnprocTop: ctx.headersUnprocTotalTop(), + nHdrUnprocessed: ctx.headersUnprocTotal(), + nHdrUnprocFragm: ctx.hdr.unprocessed.chunks, + + nBlkStaged: ctx.blocksStagedQueueLen(), + blkStagedBottom: ctx.blocksStagedQueueBottomKey(), + blkUnprocBottom: ctx.blocksUnprocTotalBottom(), + nBlkUnprocessed: ctx.blocksUnprocTotal(), + nBlkUnprocFragm: ctx.blk.unprocessed.chunks, + + state: ctx.pool.lastState, + nBuddies: ctx.pool.nBuddies) + +proc tickerLogger(t: TickerRef; ctx: BeaconCtxRef) = + let + data = ctx.updater() + now = Moment.now() + + if now <= t.visited + tickerLogInterval: + return + + if data != t.lastStats or + tickerLogSuppressMax < (now - t.visited): + let + B = if data.base == data.latest: "L" else: data.base.bnStr + L = if data.latest == data.coupler: "C" else: data.latest.bnStr + I = if data.top == 0: "n/a" else : data.top.bnStr + C = if data.coupler == data.dangling: "D" + elif data.coupler < high(int64).uint64: data.coupler.bnStr + else: "n/a" + D = if data.dangling == data.head: "H" else: data.dangling.bnStr + H = if data.head == data.target: "T" + elif data.activeOk: data.head.bnStr + else: "?" & $data.head + T = if data.activeOk: data.target.bnStr else: "?" & $data.target + + hS = if data.nHdrStaged == 0: "n/a" + else: data.hdrStagedTop.bnStr & "[" & $data.nHdrStaged & "]" + hU = if data.nHdrUnprocFragm == 0 and data.nHdrUnprocessed == 0: "n/a" + elif data.hdrUnprocTop == 0: + "(" & data.nHdrUnprocessed.toSI & "," & + $data.nHdrUnprocFragm & ")" + else: data.hdrUnprocTop.bnStr & "(" & + data.nHdrUnprocessed.toSI & "," & $data.nHdrUnprocFragm & ")" + hQ = if hS == "n/a": hU + elif hU == "n/a": hS + else: hS & "<-" & hU + + bS = if data.nBlkStaged == 0: "n/a" + else: data.blkStagedBottom.bnStr & "[" & $data.nBlkStaged & "]" + bU = if data.nBlkUnprocFragm == 0 and data.nBlkUnprocessed == 0: "n/a" + elif data.blkUnprocBottom == high(BlockNumber): + "(" & data.nBlkUnprocessed.toSI & "," & + $data.nBlkUnprocFragm & ")" + else: data.blkUnprocBottom.bnStr & "(" & + data.nBlkUnprocessed.toSI & "," & $data.nBlkUnprocFragm & ")" + bQ = if bS == "n/a": bU + elif bU == "n/a": bS + else: bS & "<-" & bU + + st = case data.state + of idle: "0" + of headers: "h" + of headersCancel: "x" + of headersFinish: "f" + of blocks: "b" + of blocksCancel: "x" + of blocksFinish: "f" + + nP = data.nBuddies + + # With `int64`, there are more than 29*10^10 years range for seconds + up = (now - t.started).seconds.uint64.toSI + mem = getTotalMem().uint.toSI + + t.lastStats = data + t.visited = now + + case data.state + of idle: + debug "Sync state idle", up, nP, B, L, + D, H, T, hQ, bQ, + mem + + of headers, headersCancel, headersFinish: + debug "Sync state headers", up, nP, st, B, L, + C, D, H, T, hQ, + mem + + of blocks, blocksCancel, blocksFinish: + debug "Sync state blocks", up, nP, st, B, L, + D, I, H, T, bQ, + mem + +# ------------------------------------------------------------------------------ +# Public function +# ------------------------------------------------------------------------------ + +proc syncTicker*(): BackgroundTicker = + let desc = TickerRef(started: Moment.now()) + return proc(ctx: BeaconCtxRef) = + desc.tickerLogger(ctx) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/README.md b/tools/syncer/replay/README.md new file mode 100644 index 0000000000..97ca6c3d31 --- /dev/null +++ b/tools/syncer/replay/README.md @@ -0,0 +1,48 @@ +Inspection of Capture Data And Replay +===================================== + +Inspection +---------- + +Given a (probably *gzipped*) capture file **(capture)** as a result of +tracing, its content can be visualised as a space separated list of +selected text fields via + + ./build/syncer_test_client_inspect --capture-file=(capture) + +As the **(capture)** is a list of *JSON* text lines, the *gunzipped* version +can also can be inspected with a text editor (or perusal pager *more* or +*less*.). + +Replay +------ + +Copy the current database directory **(database)** and its recursive content +as **(dbcopy)**, say. Then start a capture session on the original data base +via + + ./build/syncer_test_client_trace \ + --datadir=(database) ... -- --capture-file=(capture) + +where **...** stands for all other options that might be useful for running +an execution layer session and **(capture)** will collect all the data needed +for replay. This file can become quite huge. It should be *gzipped* after the +capture run has finished and the *gzipped* version used, instead. + +Monitor the capture run so it can be stopped at an appropriate state using +metrics or logs. With the above command line arguments, only a single sync +session is logged ranging from the first activation message (when *"Activating +syncer"* is logged) up until the suspend message (when *"Suspending syncer"* +is logged.) + +Now, the captured session can be replayed on the secured database copy +**(dbcopy)** with the (probably *gzipped*) **(capture)** file via + + ./build/syncer_test_client_replay \ + --datadir=(dbcopy) ... -- --capture-file=(capture) + +where ihe additional arguments **...** of either command above need not be +the same. + +Note that you need another copy of the original **(database)** if you need to +re-exec the latter command line statement for another replay. diff --git a/tools/syncer/replay/replay_desc.nim b/tools/syncer/replay/replay_desc.nim new file mode 100644 index 0000000000..732fdd8b69 --- /dev/null +++ b/tools/syncer/replay/replay_desc.nim @@ -0,0 +1,142 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + std/streams, + ../trace/trace_desc, + ./replay_reader/reader_desc + +export + reader_desc, + trace_desc + +const + ReplaySetupID* = 2 ## Phase 1 layout ID, prepare + ReplayRunnerID* = 20 ## Phase 2 layout ID, full execution + + ReplayTypeLabel* = block: + var a: array[TraceRecType,string] + a[TraceRecType(0)] = "=Oops" + a[VersionInfo] = "=Version" + a[SyncActvFailed] = "=ActvFailed" + a[SyncActivated] = "=Activated" + a[SyncHibernated] = "=Suspended" + a[SchedStart] = "=StartPeer" + a[SchedStop] = "=StopPeer" + a[SchedPool] = "=Pool" + a[SchedDaemonBegin] = "+Daemon" + a[SchedDaemonEnd] = "-Daemon" + a[SchedPeerBegin] = "+Peer" + a[SchedPeerEnd] = "-Peer" + a[FetchHeaders] = "=HeadersFetch" + a[SyncHeaders] = "=HeadersSync" + a[FetchBodies] = "=BodiesFetch" + a[SyncBodies] = "=BodiesSync" + a[ImportBlock] = "=BlockImport" + a[SyncBlock] = "=BlockSync" + for w in a: + doAssert 0 < w.len + a + +type + ReplayStopIfFn* = proc(): bool {.gcsafe, raises: [].} + ## Loop control directive for runner/dispatcher + + ReplayEndUpFn* = proc() {.gcsafe, raises: [].} + ## Terminator control directive for runner/dispatcher + + ReplayRef* = ref object of BeaconHandlersSyncRef + ## Overlay handlers extended by descriptor data for caching replay state + ctx*: BeaconCtxRef ## Parent context + captStrm*: Stream ## Input stream, capture file + fakeImport*: bool ## No database import if `true` + stopQuit*: bool ## Quit after replay + backup*: BeaconHandlersRef ## Can restore previous handlers + reader*: ReplayReaderRef ## Input records + + + ReplayPayloadRef* = ref object of RootRef + ## Decoded payload base record + recType*: TraceRecType + + ReplayVersionInfo* = ref object of ReplayPayloadRef + bag*: TraceVersionInfo + + # ------------- + + ReplaySyncActvFailed* = ref object of ReplayPayloadRef + bag*: TraceSyncActvFailed + + ReplaySyncActivated* = ref object of ReplayPayloadRef + bag*: TraceSyncActivated + + ReplaySyncHibernated* = ref object of ReplayPayloadRef + bag*: TraceSyncHibernated + + # ------------- + + ReplaySchedDaemonBegin* = ref object of ReplayPayloadRef + bag*: TraceSchedDaemonBegin + + ReplaySchedDaemonEnd* = ref object of ReplayPayloadRef + bag*: TraceSchedDaemonEnd + + ReplaySchedStart* = ref object of ReplayPayloadRef + bag*: TraceSchedStart + + ReplaySchedStop* = ref object of ReplayPayloadRef + bag*: TraceSchedStop + + ReplaySchedPool* = ref object of ReplayPayloadRef + bag*: TraceSchedPool + + ReplaySchedPeerBegin* = ref object of ReplayPayloadRef + bag*: TraceSchedPeerBegin + + ReplaySchedPeerEnd* = ref object of ReplayPayloadRef + bag*: TraceSchedPeerEnd + + # ------------- + + ReplayFetchHeaders* = ref object of ReplayPayloadRef + bag*: TraceFetchHeaders + + ReplaySyncHeaders* = ref object of ReplayPayloadRef + bag*: TraceSyncHeaders + + + ReplayFetchBodies* = ref object of ReplayPayloadRef + bag*: TraceFetchBodies + + ReplaySyncBodies* = ref object of ReplayPayloadRef + bag*: TraceSyncBodies + + + ReplayImportBlock* = ref object of ReplayPayloadRef + bag*: TraceImportBlock + + ReplaySyncBlock* = ref object of ReplayPayloadRef + bag*: TraceSyncBlock + +# ------------------------------------------------------------------------------ +# Public helpers +# ------------------------------------------------------------------------------ + +template replayLabel*(w: untyped): string = + ## Static getter, retrieve replay type label + ReplayTypeLabel[(typeof w.bag).toTraceRecType] + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_reader.nim b/tools/syncer/replay/replay_reader.nim new file mode 100644 index 0000000000..b5b7f2354e --- /dev/null +++ b/tools/syncer/replay/replay_reader.nim @@ -0,0 +1,76 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + std/[net, syncio], + ./replay_reader/[reader_init, reader_unpack, reader_reclog], + ./replay_desc + +export + ReplayReaderRef, + reader_init + +type + StopFn* = proc(): bool {.gcsafe, raises: [].} + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc nextRecord*(rp: ReplayReaderRef): ReplayPayloadRef = + ## Retrieve the next record from the capture + while true: + var line = rp.readLine(rp).valueOr: + return ReplayPayloadRef(nil) + if 0 < line.len and line[0] != '#': + return line.unpack() + +proc captureLog*( + rp: ReplayReaderRef; + prt: ReplayRecLogPrintFn; + stop: StopFn; + ) = + ## Cycle through capture records from `rp` and feed them to the + ## argument `prt()`. + var n = 0 + while not stop(): + let w = rp.nextRecord() + if w.isNil and rp.atEnd(rp): + break + n.inc + prt w.recLogToStrList(n) + prt n.recLogToStrEnd() + +proc captureLog*( + rp: ReplayReaderRef; + stop: StopFn; + ) = + ## Pretty print linewise records from the capture `rp`. + rp.captureLog(stdout.recLogPrint(), stop) + +# ------------------------------------------------------------------------------ +# Public iterators +# ------------------------------------------------------------------------------ + +iterator records*(rp: ReplayReaderRef): ReplayPayloadRef = + ## Iterate over all capture records + while true: + let record = rp.nextRecord() + if record.isNil and rp.atEnd(rp): + break + yield record + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_reader/reader_desc.nim b/tools/syncer/replay/replay_reader/reader_desc.nim new file mode 100644 index 0000000000..368a7c4192 --- /dev/null +++ b/tools/syncer/replay/replay_reader/reader_desc.nim @@ -0,0 +1,40 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + std/streams, + pkg/[results, zlib] + +type + ReplayRecLogPrintFn* = proc(s: seq[string]) {.gcsafe, raises: [].} + ## Print output (e.g. used in `lineLog()`) for logger + + ReplayReadLineFn* = + proc(rp: ReplayReaderRef): Opt[string] {.gcsafe, raises: [].} + ## Reader filter, e.g. for zipped data + + ReplayAtEndFn* = + proc(rp: ReplayReaderRef): bool {.gcsafe, raises: [].} + ## Indicated end of stream + + ReplayReaderRef* = ref object + ## Reader descriptor + inStream*: Stream ## Dump file for ethxx data packets + gzFilter*: GUnzipRef ## Apply GUnzip filter to stream + readLine*: ReplayReadLineFn ## Reader function + atEnd*: ReplayAtEndFn ## EOF indicator + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_reader/reader_helpers.nim b/tools/syncer/replay/replay_reader/reader_helpers.nim new file mode 100644 index 0000000000..66aede05d4 --- /dev/null +++ b/tools/syncer/replay/replay_reader/reader_helpers.nim @@ -0,0 +1,124 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay helpers + +{.push raises:[].} + +import + std/strutils, + pkg/[chronos, eth/common], + ../../trace/trace_setup/setup_helpers as trace_helpers, + ../../../../execution_chain/sync/beacon/worker/helpers as worker_helpers, + ../replay_desc + +export + trace_helpers.idStr, + trace_helpers.short, + worker_helpers + +# ------------------------------------------------------------------------------ +# Public helpers +# ------------------------------------------------------------------------------ + +func ageStr*(w: chronos.Duration): string = + var + res = newStringOfCap(32) + nsLeft = w.nanoseconds() + + # Inspired by `chronos/timer.toString()` + template f( + pfxChr: static[char]; + pfxLen: static[int]; + ela: static[chronos.Duration]; + sep: static[string]; + ) = + let n = uint64(nsLeft div ela.nanoseconds()) + when pfxLen == 0: + let s = if 0 < n: $n else: "" + else: + let s = $n + when 0 < pfxLen: + res.add pfxChr.repeat(max(0, pfxLen - s.len)) + res.add s + when pfxLen == 0: + if 0 < n: res.add sep + else: + res.add sep + nsLeft = nsLeft mod ela.nanoseconds() + + f(' ', 0, chronos.Day, "d ") + f('0', 2, chronos.Hour, ":") + f('0', 2, chronos.Minute, ":") + f('0', 2, chronos.Second, ".") + f('0', 3, chronos.Millisecond, ".") + f('0', 3, chronos.Microsecond, "") + + res + +func toUpperFirst*(w: string): string = + if 1 < w.len: + $w[0].toUpperAscii & w.substr(1) + else: + w + +# ---------------- + +template withReplayTypeExpr*(recType: TraceRecType): untyped = + ## Big switch for allocating `TraceRecType` type dependent replay code + ## using the replay record layouts. + ## + mixin replayTypeExpr + + case recType: + of TraceRecType(0): + replayTypeExpr(TraceRecType(0), ReplayPayloadRef) + of VersionInfo: + replayTypeExpr(VersionInfo, ReplayVersionInfo) + + of SyncActvFailed: + replayTypeExpr(SyncActvFailed,ReplaySyncActvFailed) + of SyncActivated: + replayTypeExpr(SyncActivated, ReplaySyncActivated) + of SyncHibernated: + replayTypeExpr(SyncHibernated, ReplaySyncHibernated) + + of SchedDaemonBegin: + replayTypeExpr(SchedDaemonBegin, ReplaySchedDaemonBegin) + of SchedDaemonEnd: + replayTypeExpr(SchedDaemonEnd, ReplaySchedDaemonEnd) + of SchedStart: + replayTypeExpr(SchedStart, ReplaySchedStart) + of SchedStop: + replayTypeExpr(SchedStop, ReplaySchedStop) + of SchedPool: + replayTypeExpr(SchedPool, ReplaySchedPool) + of SchedPeerBegin: + replayTypeExpr(SchedPeerBegin, ReplaySchedPeerBegin) + of SchedPeerEnd: + replayTypeExpr(SchedPeerEnd, ReplaySchedPeerEnd) + + of FetchHeaders: + replayTypeExpr(FetchHeaders, ReplayFetchHeaders) + of SyncHeaders: + replayTypeExpr(SyncHeaders, ReplaySyncHeaders) + + of FetchBodies: + replayTypeExpr(FetchBodies, ReplayFetchBodies) + of SyncBodies: + replayTypeExpr(SyncBodies, ReplaySyncBodies) + of ImportBlock: + replayTypeExpr(ImportBlock, ReplayImportBlock) + of SyncBlock: + replayTypeExpr(SyncBlock, ReplaySyncBlock) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_reader/reader_init.nim b/tools/syncer/replay/replay_reader/reader_init.nim new file mode 100644 index 0000000000..9d2299843a --- /dev/null +++ b/tools/syncer/replay/replay_reader/reader_init.nim @@ -0,0 +1,148 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + std/[endians, os, streams, strutils], + pkg/[chronicles, eth/common, zlib], + ../replay_desc + +logScope: + topics = "replay reader" + +type + FileSignature = enum + Unknown = 0 + Plain + Gzip + +const + DontQuit = low(int) + ## To be used with `onCloseException()` + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +template onException( + info: static[string]; + quitCode: static[int]; + code: untyped) = + try: + code + except CatchableError as e: + const blurb = info & "Replay stream exception" + when quitCode == DontQuit: + error blurb, error=($e.name), msg=e.msg + else: + fatal blurb & " -- STOP", error=($e.name), msg=e.msg + quit(quitCode) + +proc getFileSignature(strm: Stream): (FileSignature,uint16) = + const info = "getSignature(): " + var u16: uint16 + info.onException(QuitFailure): + let v16 = strm.peekUint16() + (addr u16).bigEndian16(addr v16) + + # Gzip signature + if u16 == 0x1f8b'u16: + return (Gzip,u16) + + # Ascii signature: /{"/ or /#[0-9a-zA-Z ]/ + let (c0, c1) = (char(u16 shr 8), char(u16.uint8)) + if c0 == '{' and c1 == '"': + return (Plain,u16) + if c0 == '#' and (c1.isAlphaNumeric or c1.isSpaceAscii): + return (Plain,u16) + + (Unknown,u16) + +# ------------------------------------------------------------------------------ +# Private record reader functions +# ------------------------------------------------------------------------------ + +proc plainReadLine(rp: ReplayReaderRef): Opt[string] = + const info = "plainReadLine(): " + info.onException(DontQuit): + if not rp.inStream.atEnd(): + return ok(rp.inStream.readLine) + err() + +proc plainAtEnd(rp: ReplayReaderRef): bool = + const info = "plainAtEnd(): " + info.onException(DontQuit): + return rp.inStream.atEnd() + true + +proc gUnzipReadLine(rp: ReplayReaderRef): Opt[string] = + const info = "gUnzipReadLine(): " + var rc = Result[string,GUnzipStatus].err((Z_STREAM_ERROR,"")) + info.onException(DontQuit): + rc = rp.gzFilter.nextLine() + if rc.isErr(): + if not rp.gzFilter.lineStatusOk(): + let err = rp.gzFilter.lineStatus() + info info & "GUnzip filter error", zError=err.zError, info=err.info + return err() + ok(rc.value) + +proc gUnzipAtEnd(rp: ReplayReaderRef): bool = + rp.gzFilter.atEnd() + +# ------------------------------------------------------------------------------ +# Public constructor(s) +# ------------------------------------------------------------------------------ + +proc init*(T: type ReplayReaderRef; strm: Stream): T = + const info = "ReplayReaderRef.init(): " + + if strm.isNil: + fatal info & "Cannot use nil stream for reading -- STOP" + quit(QuitFailure) + + let + (sig, u16) = strm.getFileSignature() # Check file encoding + rp = T(inStream: strm) # Set up descriptor + + # Set up line reader, probably with gunzip/deflate filter + case sig: + of Plain: + rp.readLine = plainReadLine + rp.atEnd = plainAtEnd + of Gzip: + var rc = Result[GUnzipRef,GUnzipStatus].err((Z_STREAM_ERROR,"")) + info.onException(DontQuit): + rc = GUnzipRef.init(strm) + if rc.isErr: + fatal info & "Cannot assign gunzip reader -- STOP" + quit(QuitFailure) + rp.gzFilter = rc.value + rp.readLine = gUnzipReadLine + rp.atEnd = gUnzipAtEnd + of Unknown: + fatal info & "Unsupported file encoding -- STOP", + fileSignature=("0x" & $u16.toHex(4)) + quit(QuitFailure) + + rp + + +proc destroy*(rp: ReplayReaderRef) = + const info = "destroy(ReplayReaderRef): " + info.onException(DontQuit): + rp.inStream.flush() + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_reader/reader_reclog.nim b/tools/syncer/replay/replay_reader/reader_reclog.nim new file mode 100644 index 0000000000..09009dab48 --- /dev/null +++ b/tools/syncer/replay/replay_reader/reader_reclog.nim @@ -0,0 +1,330 @@ + +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + std/[net, strformat, strutils, syncio], + pkg/[chronicles, chronos, eth/common, eth/rlp], + ../../../../execution_chain/utils/prettify, + ../replay_desc, + ./reader_helpers + +logScope: + topics = "replay reader" + +# ------------------------------------------------------------------------------ +# Private functions +# ------------------------------------------------------------------------------ + +proc addX( + q: var seq[string]; + info: string; + lnr: int; + base: TraceRecBase; + ) = + ## Output header + q.add base.time.ageStr() + q.add info + if 0 < lnr and base.serial != lnr.uint: + q.add $base.serial & "!" & $lnr + else: + q.add $base.serial + + if base.frameID.isSome(): + q.add base.frameID.value.idStr + else: + q.add "*" + q.add $base.nPeers + q.add ($base.syncState).toUpperFirst() + q.add ($base.chainMode).toUpperFirst() + + q.add base.baseNum.bnStr() + q.add base.latestNum.bnStr() + + if base.chainMode in {collecting,ready,orphan}: + q.add base.antecedent.bnStr() + else: + q.add "*" + + if base.peerCtx.isSome(): + q.add $base.peerCtx.value.peerCtrl + q.add "peerID=" & base.peerCtx.value.peerID.short() + else: + q.add "*" + q.add "*" + + if base.hdrUnpr.isSome(): + q.add "uHdr=" & $base.hdrUnpr.value.hLen & "/" & + $base.hdrUnpr.value.hChunks & "/" & + $base.hdrUnpr.value.hLastLen & ":" & + $base.hdrUnpr.value.hLast.bnStr + + if base.blkUnpr.isSome(): + q.add "uBlk=" & $base.blkUnpr.value.bLen & "/" & + $base.blkUnpr.value.bChunks & "/" & + $base.blkUnpr.value.bLeast.bnStr & ":" & + $base.blkUnpr.value.bLeastLen + + if base.peerCtx.isSome() and + (0 < base.peerCtx.value.nHdrErrors or + 0 < base.peerCtx.value.nBlkErrors): + q.add "nErr=(" & $base.peerCtx.value.nHdrErrors & + "," & $base.peerCtx.value.nBlkErrors & ")" + + if base.slowPeer.isSome(): + q.add "slowPeer=" & base.slowPeer.value.short() + +# ------------------------------------------------------------------------------ +# Private record handlers +# ------------------------------------------------------------------------------ + +func toStrOops(n: int): seq[string] = + @["?", $n] + +# ----------- + +func toStrSeq(n: int; w: ReplayVersionInfo): seq[string] = + var res = newSeqOfCap[string](15) + res.addX(w.replayLabel, n, w.bag) + let moan = if w.bag.version < TraceVersionID: "(<" & $TraceVersionID & ")" + elif TraceVersionID < w.bag.version: "(>" & $TraceVersionID & ")" + else: "" + res.add "version=" & $w.bag.version & moan + res.add "network=" & $w.bag.networkId + res.add "base=" & w.bag.baseNum.bnStr + res.add "latest=" & w.bag.latestNum.bnStr + res + +# ----------- + +func toStrSeq(n: int; w: ReplaySyncActvFailed): seq[string] = + var res = newSeqOfCap[string](15) + res.addX(w.replayLabel, n, w.bag) + res.add "base=" & w.bag.baseNum.bnStr + res.add "latest=" & w.bag.latestNum.bnStr + res + +func toStrSeq(n: int; w: ReplaySyncActivated): seq[string] = + var res = newSeqOfCap[string](20) + res.addX(w.replayLabel, n, w.bag) + res.add "head=" & w.bag.head.bnStr + res.add "finHash=" & w.bag.finHash.short + res.add "base=" & w.bag.baseNum.bnStr + res.add "latest=" & w.bag.latestNum.bnStr + res + +func toStrSeq(n: int; w: ReplaySyncHibernated): seq[string] = + var res = newSeqOfCap[string](15) + res.addX(w.replayLabel, n, w.bag) + res.add "base=" & w.bag.baseNum.bnStr + res.add "latest=" & w.bag.latestNum.bnStr + res + +# ----------- + +func toStrSeq(n: int; w: ReplaySchedDaemonBegin): seq[string] = + var res = newSeqOfCap[string](15) + res.addX(w.replayLabel, n, w.bag) + res + +func toStrSeq(n: int; w: ReplaySchedDaemonEnd): seq[string] = + var res = newSeqOfCap[string](15) + res.addX(w.replayLabel, n, w.bag) + res + +func toStrSeq(n: int; w: ReplaySchedStart): seq[string] = + var res = newSeqOfCap[string](20) + res.addX(w.replayLabel, n, w.bag) + res.add "peer=" & $w.bag.peerIP & ":" & $w.bag.peerPort + if not w.bag.accept: + res.add "rejected" + res + +func toStrSeq(n: int; w: ReplaySchedStop): seq[string] = + var res = newSeqOfCap[string](20) + res.addX(w.replayLabel, n, w.bag) + res.add "peer=" & $w.bag.peerIP & ":" & $w.bag.peerPort + res + +func toStrSeq(n: int; w: ReplaySchedPool): seq[string] = + var res = newSeqOfCap[string](20) + res.addX(w.replayLabel, n, w.bag) + res.add "peer=" & $w.bag.peerIP & ":" & $w.bag.peerPort + res.add "last=" & $w.bag.last + res.add "laps=" & $w.bag.laps + res.add "stop=" & $w.bag.stop + res + +func toStrSeq(n: int; w: ReplaySchedPeerBegin): seq[string] = + var res = newSeqOfCap[string](20) + res.addX(w.replayLabel, n, w.bag) + res.add "peer=" & $w.bag.peerIP & ":" & $w.bag.peerPort + res + +func toStrSeq(n: int; w: ReplaySchedPeerEnd): seq[string] = + var res = newSeqOfCap[string](15) + res.addX(w.replayLabel, n, w.bag) + res + +# ----------- + +func toStrSeq(n: int; w: ReplayFetchHeaders): seq[string] = + var res = newSeqOfCap[string](20) + res.addX(w.replayLabel, n, w.bag) + let + rLen = w.bag.req.maxResults + rRev = if w.bag.req.reverse: "rev" else: "" + if w.bag.req.startBlock.isHash: + res.add "req=" & + w.bag.req.startBlock.hash.short & "[" & $rLen & "]" & rRev + else: + res.add "req=" & + w.bag.req.startBlock.number.bnStr & "[" & $rLen & "]" & rRev + if 0 < w.bag.req.skip: + res.add "skip=" & $w.bag.req.skip + if w.bag.fetched.isSome(): + res.add "res=[" & $w.bag.fetched.value.packet.headers.len & "]" + res.add "ela=" & w.bag.fetched.value.elapsed.toStr + if w.bag.error.isSome(): + if w.bag.error.value.excp.ord == 0: + res.add "failed" + else: + res.add "excp=" & ($w.bag.error.value.excp).substr(1) + if w.bag.error.value.msg.len != 0: + res.add "error=" & w.bag.error.value.name & + "(" & w.bag.error.value.msg & ")" + res.add "ela=" & w.bag.error.value.elapsed.toStr + res + +func toStrSeq(n: int; w: ReplaySyncHeaders): seq[string] = + var res = newSeqOfCap[string](20) + res.addX(w.replayLabel, n, w.bag) + res + + +func toStrSeq(n: int; w: ReplayFetchBodies): seq[string] = + var res = newSeqOfCap[string](20) + res.addX(w.replayLabel, n, w.bag) + res.add "req=" & w.bag.ivReq.bnStr & "[" & $w.bag.req.blockHashes.len & "]" + if w.bag.fetched.isSome(): + res.add "res=[" & $w.bag.fetched.value.packet.bodies.len & "]" + res.add "size=" & + w.bag.fetched.value.packet.bodies.getEncodedLength.uint64.toSI + res.add "ela=" & w.bag.fetched.value.elapsed.toStr + if w.bag.error.isSome(): + if w.bag.error.value.excp.ord == 0: + res.add "failed" + else: + res.add "excp=" & ($w.bag.error.value.excp).substr(1) + if w.bag.error.value.msg.len != 0: + res.add "error=" & + w.bag.error.value.name & "(" & w.bag.error.value.msg & ")" + res.add "ela=" & w.bag.error.value.elapsed.toStr + res + +func toStrSeq(n: int; w: ReplaySyncBodies): seq[string] = + var res = newSeqOfCap[string](20) + res.addX(w.replayLabel, n, w.bag) + res + + +func toStrSeq(n: int; w: ReplayImportBlock): seq[string] = + var res = newSeqOfCap[string](20) + res.addX(w.replayLabel, n, w.bag) + res.add "block=" & w.bag.ethBlock.bnStr + res.add "size=" & w.bag.ethBlock.getEncodedLength.uint64.toSI + res.add "effPeerID=" & w.bag.effPeerID.short + if w.bag.elapsed.isSome(): + res.add "ela=" & w.bag.elapsed.value.toStr + if w.bag.error.isSome(): + if w.bag.error.value.excp.ord == 0: + res.add "failed" + else: + res.add "excp=" & ($w.bag.error.value.excp).substr(1) + if w.bag.error.value.msg.len != 0: + res.add "error=" & + w.bag.error.value.name & "(" & w.bag.error.value.msg & ")" + res.add "ela=" & w.bag.error.value.elapsed.toStr + res + +func toStrSeq(n: int; w: ReplaySyncBlock): seq[string] = + var res = newSeqOfCap[string](20) + res.addX(w.replayLabel, n, w.bag) + res + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc recLogPrint*(fh: File): ReplayRecLogPrintFn = + ## The function provides an example for a call back pretty printer + ## for `lineLog()`. + return proc(w: seq[string]) = + try: + block doFields: + if w.len <= 9: + fh.write w.join(" ") + break doFields + + # at least 9 fields + fh.write "" & + &"{w[0]:>18} {w[1]:<13} {w[2]:>6} " & + &"{w[3]:>5} {w[4]:>2} {w[5]:<13} " & + &"{w[6]:<10} {w[7]:>10} {w[8]:>10} " & + &"{w[9]:>10}" + + if w.len <= 11: + if w.len == 11: + fh.write " " + fh.write w[10] + break doFields + + # at least 12 fields + if w.len <= 12: + fh.write &" {w[10]:<10} " + fh.write w[11] + break doFields + + # more than 12 fields + fh.write &" {w[10]:<10} {w[11]:<15}" + + # at least 13 fields + fh.write " " + fh.write w[12 ..< w.len].join(" ") + + fh.write "\n" + except IOError as e: + warn "lineLogPrint(): Exception while writing to file", + name=($e.name), msg=e.msg + +# ----------- + +func recLogToStrEnd*(n: int): seq[string] = + @[".", $n] + +proc recLogToStrList*(pyl: ReplayPayloadRef; lnr = 0): seq[string] = + ## Convert the internal capture object argument `pyl` to a list of + ## printable strings. + ## + template replayTypeExpr(t: TraceRecType, T: type): untyped = + when t == TraceRecType(0): + lnr.toStrOops() + else: + lnr.toStrSeq(pyl.T) + + pyl.recType.withReplayTypeExpr() + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_reader/reader_unpack.nim b/tools/syncer/replay/replay_reader/reader_unpack.nim new file mode 100644 index 0000000000..cc1f706862 --- /dev/null +++ b/tools/syncer/replay/replay_reader/reader_unpack.nim @@ -0,0 +1,269 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + std/[net, strutils], + pkg/[chronicles, chronos, eth/common, results, stew/interval_set], + pkg/json_serialization/pkg/results, + pkg/eth/common/eth_types_json_serialization, + ../replay_desc, + ./reader_helpers + +logScope: + topics = "replay reader" + +type + JsonKind = object + ## For extracting record type only (use with flavor: `SingleField`) + kind: TraceRecType + + BnPair = object + ## For parsing `BnRange` + least: BlockNumber + last: BlockNumber + +const + DontQuit = low(int) + ## To be used with `onCloseException()` + +# ------------------------------------------------------------------------------ +# Private JSON config +# ------------------------------------------------------------------------------ + +createJsonFlavor SingleField, + requireAllFields = false + +JsonKind.useDefaultSerializationIn SingleField + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +template onException( + info: static[string]; + quitCode: static[int]; + code: untyped) = + try: + code + except CatchableError as e: + const blurb = info & ": Replay stream reader exception" + when quitCode == DontQuit: + error blurb, error=($e.name), msg=e.msg + else: + fatal blurb & " -- STOP", error=($e.name), msg=e.msg + quit(quitCode) + +func fromHex(c: char): int = + case c + of '0'..'9': ord(c) - ord('0') + of 'a'..'f': ord(c) - ord('a') + 10 + of 'A'..'F': ord(c) - ord('A') + 10 + else: -1 + + +proc toIp4Address(s: string): Opt[IpAddress] = + ## Parse IPv4 dotted address string + ## + # Make sure that `nibbles.len` == 4 + let dgts = s.split('.') + if dgts.len != 4: + return err() + + var ip4 = IpAddress(family: IpAddressFamily.IPv4) + for n in 0 .. 3: + "toIp4Address()".onException(DontQuit): + ip4.address_v4[n] = dgts[n].parseUInt().uint8 + continue + return err() + ok(move ip4) + + +proc toIp6Address(s: string): Opt[IpAddress] = + ## Parse IPv6 address string + ## + # Make sure that `nibbles.len` == 8 + var xDgts = s.split(':') + if xDgts.len < 3 or 8 < xDgts.len: + return err() + # Take care of shortcuts like "::ffff:6366:d1" or "::1" + var (start, pfxLen) = (0, 0) + if xDgts.len < 8: + # A shortcut for missing zeros must start with "::" + if xDgts[0].len == 0 and xDgts[1].len == 0: + (start, pfxLen) = (2, 8 - xDgts.len) + else: + return err() + + var ip6 = IpAddress(family: IpAddressFamily.IPv6) + for n in start ..< xDgts.len: + if xDgts[n].len != 0: + "toIp6Address()".onException(DontQuit): + let + u16 = xDgts[n].parseHexInt().uint16 + pos = 2 * (pfxLen + n) + ip6.address_v6[pos] = (u16 shr 8).uint8 + ip6.address_v6[pos+1] = (u16 and 255).uint8 + continue + return err() + ok(move ip6) + +# ------------------------------------------------------------------------------ +# Private JSON mixin helpers for decoder +# ------------------------------------------------------------------------------ + +proc readValue( + r: var JsonReader; + v: var chronos.Duration; + ) {.raises: [IOError, SerializationError].} = + let kind = r.tokKind + case kind: + of JsonValueKind.Number: + var u64: uint64 + r.readValue(u64) + v = nanoseconds(cast[int64](u64)) + else: + r.raiseUnexpectedValue("Invalid Duiration value type: " & $kind) + +proc readValue( + r: var JsonReader; + v: var IpAddress; + ) {.raises: [IOError, SerializationError].} = + let kind = r.tokKind + case kind: + of JsonValueKind.String: + var ipString: string + r.readValue(ipString) + if 0 <= ipString.find('.'): + v = ipString.toIp4Address.valueOr: + r.raiseUnexpectedValue("Invalid IPv4 address value: " & $ipString) + else: + v = ipString.toIp6Address.valueOr: + r.raiseUnexpectedValue("Invalid IPv6 address value: " & $ipString) + else: + r.raiseUnexpectedValue("Invalid IP address value type: " & $kind) + +proc readValue( + r: var JsonReader; + v: var Port; + ) {.raises: [IOError, SerializationError].} = + let kind = r.tokKind + case kind: + of JsonValueKind.Number: + var u64: uint64 + r.readValue(u64) + if 0xffffu < u64: + r.raiseUnexpectedValue("Invalid Port value: " & $u64) + v = Port(cast[uint16](u64)) + else: + r.raiseUnexpectedValue("Invalid Port value type: " & $kind) + +proc readValue( + r: var JsonReader; + v: var UInt256; + ) {.raises: [IOError, SerializationError].} = + ## Modified copy from `common.chain_config.nim` needed for parsing + ## a `NetworkId` type value. + ## + var (accu, ok) = (0.u256, true) + let kind = r.tokKind + case kind: + of JsonValueKind.Number: + try: + r.customIntValueIt: + accu = accu * 10 + it.u256 + except CatchableError: + ok = false + of JsonValueKind.String: + try: + var (sLen, base) = (0, 10) + r.customStringValueIt: + if ok: + var num = it.fromHex + if base <= num: + ok = false # cannot be larger than base + elif sLen < 2: + if 0 <= num: + accu = accu * base.u256 + num.u256 + elif sLen == 1 and it in {'x', 'X'}: + base = 16 # handle "0x" prefix + else: + ok = false + sLen.inc + elif num < 0: + ok = false # not a hex digit + elif base == 10: + accu = accu * 10 + num.u256 + else: + accu = accu * 16 + num.u256 + except CatchableError: + r.raiseUnexpectedValue("UInt256 string parse error") + else: + r.raiseUnexpectedValue("Invalid UInt256 value type: " & $kind & + " (expect int or hex/int string)") + if not ok: + r.raiseUnexpectedValue("UInt256 parse error") + v = accu + +proc readValue( + r: var JsonReader; + v: var BnRange; + ) {.raises: [IOError, SerializationError].} = + let kind = r.tokKind + case kind: + of JsonValueKind.Object: + var bnPair: BnPair + r.readValue(bnPair) + v = BnRange.new(bnPair.least, bnPair.last) + else: + r.raiseUnexpectedValue("Invalid BnRange value type: " & $kind) + +# ------------------------------------------------------------------------------ +# Private functions +# ------------------------------------------------------------------------------ + +proc getRecType(s: string; info: static[string]): TraceRecType = + (info & ".getRecType()").onException(DontQuit): + let j = SingleField.decode(s, JsonKind) + return j.kind + TraceRecType(0) + +proc init(T: type; s: string; info: static[string]): T = + (info & ".init()").onException(DontQuit): + var rec = Json.decode(s, JTraceRecord[typeof result.bag]) + return T(recType: rec.kind, bag: rec.bag) + T(nil) + +# ------------------------------------------------------------------------------ +# Public record decoder functions +# ------------------------------------------------------------------------------ + +proc unpack*(line: string): ReplayPayloadRef = + ## Decode a JSON string argument `line` and convert it to an internal object. + ## The function always returns a non-nil value. + ## + const info = "unpack" + + template replayTypeExpr(t: TraceRecType, T: type): untyped = + ## Mixin for `withReplayTypeExpr()` + when t == TraceRecType(0): + return T(recType: TraceRecType(0)) + else: + return T.init(line, info) + + # Big switch for allocating different JSON parsers depending on record type. + line.getRecType(info).withReplayTypeExpr() + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_runner.nim b/tools/syncer/replay/replay_runner.nim new file mode 100644 index 0000000000..0290fb0c2f --- /dev/null +++ b/tools/syncer/replay/replay_runner.nim @@ -0,0 +1,54 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/chronos, + ./replay_runner/[runner_desc, runner_dispatch], + ./replay_reader + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc runDispatcher*( + runner: ReplayRunnerRef; + reader: ReplayReaderRef; + stopIf: ReplayStopIfFn; + endUp: ReplayEndUpFn; + ) {.async: (raises: []).} = + block body: + for w in reader.records(): + # Dispatch next instruction record + await runner.dispatch(w) + + # Can continue? + if stopIf(): + break body + + # Wait for optional task switch + try: await sleepAsync replayWaitForCompletion + except CancelledError: break + + # Can continue? + if stopIf(): + break body + + # Finish + await runner.dispatchEnd() + + endUp() + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_runner/runner_desc.nim b/tools/syncer/replay/replay_runner/runner_desc.nim new file mode 100644 index 0000000000..d94223837f --- /dev/null +++ b/tools/syncer/replay/replay_runner/runner_desc.nim @@ -0,0 +1,89 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner +## + +{.push raises:[].} + +import + std/tables, + pkg/chronos, + ../../../../execution_chain/networking/p2p, + ../../../../execution_chain/sync/wire_protocol, + ../../../../execution_chain/sync/beacon/worker/worker_desc, + ../../trace/trace_desc, + ../replay_desc + +export + replay_desc + +const + replayWaitForCompletion* = chronos.nanoseconds(100) + ## Wait for other pseudo/async thread to have completed something + + replayFailTimeout* = chronos.seconds(50) + ## Bail out after waiting this long for an event to happen. This + ## timeout should cover the maximum time needed to import a block. + + replayFailTmoMinLog* = chronos.milliseconds(1) + ## Log maximum elapsed time when it exceeds this threshold. + + replayWaitMuted* = chronos.milliseconds(200) + ## Some handlers are muted, but keep them in a waiting loop so + ## the system can terminate + +type + ReplayWaitError* = tuple + ## Capture exception or error context for waiting/polling instance + excp: BeaconErrorType + name: string + msg: string + + # --------- internal context types --------- + + ReplayBuddyRef* = ref object of BeaconBuddyRef + ## Replacement of `BeaconBuddyRef` in `runPeer()` and `runPool()` + isNew*: bool ## Set in `getOrNewPeer()` when created + run*: ReplayRunnerRef ## Back-reference for convenience + frameID*: Opt[uint] ## Begin/end frame + message*: ReplayPayloadRef ## Data message channel + + ReplayDaemonRef* = ref object + ## Daemeon job frame (similar to `ReplayBuddyRef`) + run*: ReplayRunnerRef ## Back-reference for convenience + frameID*: Opt[uint] ## Begin/end frame + message*: ReplayPayloadRef ## Data message channel + + # --------- + + ReplayEthState* = object + ## Some feake settings to pretent eth/xx compatibility + capa*: Dispatcher ## Cabability `eth68`, `eth69`, etc. + prots*: array[MAX_PROTOCOLS,RootRef] ## `capa` init flags, protocol states + + ReplayRunnerRef* = ref object of ReplayRef + # Global state + ethState*: ReplayEthState ## For ethxx compatibility + stopRunner*: bool ## Shut down request + nSessions*: int ## Numer of sessions left + + # Local state + daemon*: ReplayDaemonRef ## Currently active daemon, or `nil` + peers*: Table[Hash,ReplayBuddyRef] ## Begin/End for base frames + nPeers*: uint ## Track active peer instances + failTmoMax*: chronos.Duration ## Keep track of largest timeout + + # Instruction handling + instrNumber*: uint ## Instruction counter + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_runner/runner_dispatch.nim b/tools/syncer/replay/replay_runner/runner_dispatch.nim new file mode 100644 index 0000000000..b86c288658 --- /dev/null +++ b/tools/syncer/replay/replay_runner/runner_dispatch.nim @@ -0,0 +1,102 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/[chronicles, chronos], + ../../../../execution_chain/networking/p2p, + ./runner_desc, + ./runner_dispatch/[dispatch_blocks, dispatch_headers, dispatch_sched, + dispatch_sync, dispatch_version] + +logScope: + topics = "replay runner" + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc dispatch*( + run: ReplayRunnerRef; + pyl: ReplayPayloadRef; + ) {.async: (raises: []).} = + ## Execute the internal capture object argument `pyl` as an instruction. + ## + run.instrNumber.inc + + trace "+dispatch()", n=run.instrNumber, recType=pyl.recType, + nBuddies=run.peers.len, nDaemons=(if run.daemon.isNil: 0 else: 1) + + case pyl.recType: + of TraceRecType(0): + warn "dispatch(): Oops, unexpected void record", n=run.instrNumber + + of VersionInfo: + run.versionInfoWorker(pyl.ReplayVersionInfo) + + of SyncActvFailed: + run.syncActvFailedWorker(pyl.ReplaySyncActvFailed) + of SyncActivated: + run.syncActivateWorker(pyl.ReplaySyncActivated) + of SyncHibernated: + run.syncSuspendWorker(pyl.ReplaySyncHibernated) + + # Simple scheduler single run (no begin/end) functions + of SchedStart: + run.schedStartWorker(pyl.ReplaySchedStart) + of SchedStop: + run.schedStopWorker(pyl.ReplaySchedStop) + of SchedPool: + run.schedPoolWorker(pyl.ReplaySchedPool) + + # Workers, complex run in background + of SchedDaemonBegin: + await run.schedDaemonBegin(pyl.ReplaySchedDaemonBegin) + of SchedDaemonEnd: + await run.schedDaemonEnd(pyl.ReplaySchedDaemonEnd) + of SchedPeerBegin: + await run.schedPeerBegin(pyl.ReplaySchedPeerBegin) + of SchedPeerEnd: + await run.schedPeerEnd(pyl.ReplaySchedPeerEnd) + + # Leaf handlers providing input data to background tasks `runDaemon()` + # and/or `runPeer()`. + of FetchHeaders: + await run.sendHeaders(pyl.ReplayFetchHeaders) + of SyncHeaders: + await run.sendHeaders(pyl.ReplaySyncHeaders) + + of FetchBodies: + await run.sendBodies(pyl.ReplayFetchBodies) + of SyncBodies: + await run.sendBodies(pyl.ReplaySyncBodies) + + of ImportBlock: + await run.sendBlock(pyl.ReplayImportBlock) + of SyncBlock: + await run.sendBlock(pyl.ReplaySyncBlock) + + trace "-dispatch()", n=run.instrNumber, recType=pyl.recType, + nBuddies=run.peers.len, nDaemons=(if run.daemon.isNil: 0 else: 1) + + +proc dispatchEnd*( + run: ReplayRunnerRef; + ) {.async: (raises: []).} = + # Finish + run.instrNumber.inc + info "End replay", n=run.instrNumber + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_blocks.nim b/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_blocks.nim new file mode 100644 index 0000000000..eb94e5f937 --- /dev/null +++ b/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_blocks.nim @@ -0,0 +1,233 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/[chronicles, chronos, eth/common, stew/interval_set], + ../../../../../execution_chain/sync/wire_protocol, + ../runner_desc, + ./dispatch_helpers + +logScope: + topics = "replay runner" + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +proc toBnRange( + ctx: BeaconCtxRef; + lst: openArray[Hash32]; + info: static[string]; + ): BnRange = + ## Resolve block hashes as interval of block numbers + let rs = BnRangeSet.init() + for w in lst: + let h = ctx.hdrCache.get(w).valueOr: + raiseAssert info & ": Cannot resolve" & + ", hash=" & w.short + if rs.merge(h.number,h.number) != 1: + raiseAssert info & ": dulplicate hash" & + ", hash=" & w.short & ", number=" & h.bnStr + rs.ge().expect "valid BnRange" + +proc bnStr( + lst: openArray[Hash32]; + buddy: BeaconBuddyRef; + info: static[string]; + ): string = + buddy.ctx.toBnRange(lst, info).bnStr + +proc toStr(e: BeaconError; anyTime = false): string = + "(" & $e[0] & + "," & $e[1] & + "," & $e[2] & + "," & (if anyTime: "*" else: e[3].toStr) & + ")" + +# ---------------- + +func getResponse( + instr: ReplayFetchBodies; + ): Result[FetchBodiesData,BeaconError] = + if instr.bag.fetched.isSome(): + ok(instr.bag.fetched.value) + elif instr.bag.error.isSome(): + err(instr.bag.error.value) + else: + err((ENoException,"","Missing fetch bodies return code",Duration())) + +func getResponse( + instr: ReplayImportBlock; + ): Result[Duration,BeaconError] = + if instr.bag.elapsed.isSome(): + ok(instr.bag.elapsed.value) + elif instr.bag.error.isSome(): + err(instr.bag.error.value) + else: + err((ENoException,"","Missing block import return code",Duration())) + +func getBeaconError(e: ReplayWaitError): BeaconError = + (e[0], e[1], e[2], Duration()) + +# ------------------------------------------------------------------------------ +# Public dispatcher handlers +# ------------------------------------------------------------------------------ + +proc fetchBodiesHandler*( + buddy: BeaconBuddyRef; + req: BlockBodiesRequest; + ): Future[Result[FetchBodiesData,BeaconError]] + {.async: (raises: []).} = + const info = "&fetchBodies" + let buddy = ReplayBuddyRef(buddy) + + var data: ReplayFetchBodies + buddy.withInstr(typeof data, info): + if not instr.isAvailable(): + return err(iError.getBeaconError()) # Shutdown? + if req != instr.bag.req: + raiseAssert info & ": arguments differ" & + ", serial=" & $instr.bag.serial & + ", peer=" & $buddy.peer & + # ----- + ", nBlockHashes=" & $req.blockHashes.len & + ", expected=" & $instr.bag.ivReq.len & + # ----- + ", blockHashes=" & req.blockHashes.bnStr(buddy, info) & + ", expected=" & instr.bag.ivReq.bnStr + data = instr + + buddy.withInstr(ReplaySyncBodies, info): + if not instr.isAvailable(): + return err(iError.getBeaconError()) # Shutdown? + discard # no-op, visual alignment + + return data.getResponse() + + +proc importBlockHandler*( + buddy: BeaconBuddyRef; + ethBlock: EthBlock; + effPeerID: Hash; + ): Future[Result[Duration,BeaconError]] + {.async: (raises: []).} = + const info = "&importBlock" + + let + buddy = ReplayBuddyRef(buddy) + n = buddy.iNum + peer = buddy.peerStr + peerID = buddy.peerIdStr + + var data: ReplayImportBlock + buddy.withInstr(typeof data, info): + if not instr.isAvailable(): + return err(iError.getBeaconError()) # Shutdown? + + if effPeerID != instr.bag.effPeerID: + raiseAssert info & ": eff. peer arguments differ" & + ", n=" & $n & + ", serial=" & $instr.bag.serial & + ", peer=" & $peer & + ", peerID=" & $peerID & + ", ethBlock=" & ethBlock.bnStr & + # ----- + ", effPeerID=" & effPeerID.short & + ", expected=" & instr.bag.effPeerID.short + + if ethBlock != instr.bag.ethBlock: + raiseAssert info & ": block arguments differ" & + ", n=" & $n & + ", serial=" & $instr.bag.serial & + ", peer=" & $peer & + ", peerID=" & $peerID & + ", effPeerID=" & effPeerID.short & + # ----- + ", ethBlock=" & ethBlock.bnStr & + ", expected=%" & instr.bag.ethBlock.bnStr & + # ----- + ", ethBlock=%" & ethBlock.computeRlpHash.short & + ", expected=%" & instr.bag.ethBlock.computeRlpHash.short + data = instr + + let run = buddy.run + if not run.fakeImport: + let rc = await run.backup.importBlock(buddy, ethBlock, effPeerID) + if rc.isErr or data.bag.error.isSome(): + const info = info & ": result values differ" + let serial = data.bag.serial + if rc.isErr and data.bag.error.isNone(): + warn info, n, serial, peer, peerID, + got="err" & rc.error.toStr, expected="ok" + elif rc.isOk and data.bag.error.isSome(): + warn info, n, serial, peer, peerID, + got="ok", expected="err" & data.bag.error.value.toStr(true) + elif rc.error.excp != data.bag.error.value.excp or + rc.error.msg != data.bag.error.value.msg: + warn info, n, serial, peer, peerID, + got="err" & rc.error.toStr, + expected="err" & data.bag.error.value.toStr(true) + + buddy.withInstr(ReplaySyncBlock, info): + if not instr.isAvailable(): + return err(iError.getBeaconError()) # Shutdown? + discard # no-op, visual alignment + + return data.getResponse() + +# ------------------------------------------------------------------------------ +# Public functions, data feed +# ------------------------------------------------------------------------------ + +proc sendBodies*( + run: ReplayRunnerRef; + instr: ReplayFetchBodies|ReplaySyncBodies; + ) {.async: (raises: []).} = + ## Stage bodies request/response data + const info = instr.replayLabel() + let buddy = run.getPeer(instr, info).valueOr: + raiseAssert info & ": getPeer() failed" & + ", n=" & $run.iNum & + ", serial=" & $instr.bag.serial & + ", peerID=" & instr.bag.peerCtx.value.peerID.short + discard buddy.pushInstr(instr, info) + +proc sendBlock*( + run: ReplayRunnerRef; + instr: ReplayImportBlock|ReplaySyncBlock; + ) {.async: (raises: []).} = + ## Stage block request/response data + const info = instr.replayLabel() + if instr.bag.peerCtx.isSome(): + # So it was captured run from a sync peer + let buddy = run.getPeer(instr, info).valueOr: + raiseAssert info & ": getPeer() failed" & + ", n=" & $run.iNum & + ", serial=" & $instr.bag.serial & + ", peerID=" & instr.bag.peerCtx.value.peerID.short + discard buddy.pushInstr(instr, info) + + # Verify that the daemon is properly initialised + elif run.daemon.isNil: + raiseAssert info & ": system error (no daemon)" & + ", n=" & $run.iNum & + ", serial=" & $instr.bag.serial & + ", peer=n/a" + + else: + discard run.daemon.pushInstr(instr, info) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_headers.nim b/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_headers.nim new file mode 100644 index 0000000000..0a974f126c --- /dev/null +++ b/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_headers.nim @@ -0,0 +1,111 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/[chronicles, chronos, eth/common], + ../../../../../execution_chain/sync/wire_protocol, + ../runner_desc, + ./dispatch_helpers + +logScope: + topics = "replay runner" + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +proc `==`(a,b: BlockHeadersRequest): bool = + if a.maxResults == b.maxResults and + a.skip == b.skip: + if a.startBlock.isHash: + if b.startBlock.isHash and + a.startBlock.hash == b.startBlock.hash: + return true + else: + if not b.startBlock.isHash and + a.startBlock.number == b.startBlock.number: + return true + +func getResponse( + instr: ReplayFetchHeaders; + ): Result[FetchHeadersData,BeaconError] = + if instr.bag.fetched.isSome(): + ok(instr.bag.fetched.value) + elif instr.bag.error.isSome(): + err(instr.bag.error.value) + else: + err((ENoException,"","Missing fetch headers return code",Duration())) + +func getBeaconError(e: ReplayWaitError): BeaconError = + (e[0], e[1], e[2], Duration()) + +# ------------------------------------------------------------------------------ +# Public dispatcher handlers +# ------------------------------------------------------------------------------ + +proc fetchHeadersHandler*( + buddy: BeaconBuddyRef; + req: BlockHeadersRequest; + ): Future[Result[FetchHeadersData,BeaconError]] + {.async: (raises: []).} = + ## Replacement for `getBlockHeaders()` handler. + const info = "&fetchHeaders" + let buddy = ReplayBuddyRef(buddy) + + var data: ReplayFetchHeaders + buddy.withInstr(typeof data, info): + if not instr.isAvailable(): + return err(iError.getBeaconError()) # Shutdown? + if req != instr.bag.req: + raiseAssert info & ": arguments differ" & + ", n=" & $buddy.iNum & + ", serial=" & $instr.bag.serial & + ", peer=" & $buddy.peer & + # ----- + ", reverse=" & $req.reverse & + ", expected=" & $instr.bag.req.reverse & + # ----- + ", reqStart=" & req.startBlock.toStr & + ", expected=" & instr.bag.req.startBlock.toStr & + # ----- + ", reqLen=" & $req.maxResults & + ", expected=" & $instr.bag.req.maxResults + data = instr + + buddy.withInstr(ReplaySyncHeaders, info): + if not instr.isAvailable(): + return err(iError.getBeaconError()) # Shutdown? + discard # no-op, visual alignment + + return data.getResponse() + +# ------------------------------------------------------------------------------ +# Public functions, data feed +# ------------------------------------------------------------------------------ + +proc sendHeaders*( + run: ReplayRunnerRef; + instr: ReplayFetchHeaders|ReplaySyncHeaders; + ) {.async: (raises: []).} = + ## Stage headers request/response data + const info = instr.replayLabel() + let buddy = run.getPeer(instr, info).valueOr: + raiseAssert info & ": getPeer() failed" & + ", n=" & $run.iNum & + ", serial=" & $instr.bag.serial + discard buddy.pushInstr(instr, info) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_helpers.nim b/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_helpers.nim new file mode 100644 index 0000000000..abdf674ec4 --- /dev/null +++ b/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_helpers.nim @@ -0,0 +1,818 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + std/[net, tables], + pkg/[chronos, chronicles, stew/interval_set], + ../../../../../execution_chain/networking/[p2p, p2p_peers], + ../../../../../execution_chain/sync/wire_protocol, + ../../../../../execution_chain/sync/beacon/worker/helpers as worker_helpers, + ../../../trace/trace_setup/setup_helpers as trace_helpers, + ../runner_desc + +export + trace_helpers.idStr, + trace_helpers.short, + worker_helpers + +logScope: + topics = "replay runner" + +type + ReplayWaitResult* = Result[void,ReplayWaitError] + + ReplayInstance = ReplayDaemonRef | ReplayBuddyRef + + ReplayMsgInstrType = ReplayFetchHeaders | ReplaySyncHeaders | + ReplayFetchBodies | ReplaySyncBodies | + ReplayImportBlock | ReplaySyncBlock + + ReplayPeerInstrType = ReplaySchedPeerBegin | ReplaySchedPeerEnd | + ReplaySchedStart | ReplaySchedStop | + ReplaySchedPool + + ReplaySchedInstrType = ReplaySchedDaemonBegin | ReplaySchedDaemonEnd | + ReplayPeerInstrType + + ReplayAnyInstrType = ReplayVersionInfo | ReplaySyncActvFailed | + ReplaySyncActivated | ReplaySyncHibernated | + ReplaySchedInstrType | + ReplayMsgInstrType + +# ------------------------------------------------------------------------------ +# Private helper(s) +# ------------------------------------------------------------------------------ + +template waitForConditionImpl( + run: ReplayRunnerRef; + info: static[string]; + cond: untyped; + ): ReplayWaitResult = + ## Async/template + ## + ## Wait until the condition `cond()` becomes`true`. If a `stopRunner` flag + ## is becomes `true`, this wait function function returns `err(..)`, and + ## `ok()` otherwise. + ## + var bodyRc = ReplayWaitResult.ok() + block body: + let + start = Moment.now() + n {.inject.} = run.instrNumber + var + tmoPending {.inject.} = false + count {.inject.} = 0 + + while true: + count.inc + + if run.stopRunner: + chronicles.info info & ": runner stopped", n + bodyRc = ReplayWaitResult.err((ENoException,"",info&": runner stopped")) + break body # no timeout logging + + if cond: + break + + if tmoPending: + error info & ": timeout -- STOP", n, elapsed=(Moment.now()-start).toStr + run.stopRunner = true + bodyRc = ReplayWaitResult.err((ECancelledError,"",info&": timeout")) + break + + try: + await sleepAsync replayWaitForCompletion + except CancelledError as e: + chronicles.info info & ": cancelled -- STOP", n + run.stopRunner = true + bodyRc = ReplayWaitResult.err((ECancelledError,$e.name,e.msg)) + break + + if replayFailTimeout < Moment.now() - start: + tmoPending = true + + # End `while()` + + # Log maximum waiting time + let ela = Moment.now() - start + if run.failTmoMax < ela: + if replayFailTmoMinLog <= ela: + debug info & ": max waiting time", n, + elaMax=ela.toStr, lastElaMax=run.failTmoMax.toStr + run.failTmoMax = ela + + # End block: `body` + + bodyRc # result + + +func syncedEnvCondImpl( + desc: ReplayInstance; + instr: ReplayAnyInstrType; + info: static[string]; + ): bool = + ## Condition function for `waitForConditionImpl()` for synchronising state. + ## + let ctx = desc.run.ctx + + if instr.bag.hdrUnpr.isSome(): + if instr.bag.hdrUnpr.value.hChunks != ctx.hdr.unprocessed.chunks().uint: + return false + if 0 < instr.bag.hdrUnpr.value.hChunks: + if instr.bag.hdrUnpr.value.hLen != ctx.hdr.unprocessed.total(): + return false + let iv = ctx.hdr.unprocessed.le().expect "valid iv" + if instr.bag.hdrUnpr.value.hLast != iv.maxPt or + instr.bag.hdrUnpr.value.hLastLen != iv.len: + return false + if instr.bag.antecedent != ctx.hdrCache.antecedent.number: + return false + + if instr.bag.blkUnpr.isSome(): + if instr.bag.blkUnpr.value.bChunks != ctx.blk.unprocessed.chunks().uint: + return false + if 0 < instr.bag.blkUnpr.value.bChunks: + if instr.bag.blkUnpr.value.bLen != ctx.blk.unprocessed.total(): + return false + let iv = ctx.blk.unprocessed.ge().expect "valid iv" + if instr.bag.blkUnpr.value.bLeast != iv.minPt or + instr.bag.blkUnpr.value.bLeastLen != iv.len: + return false + + return true + + +proc newPeerImpl( + run: ReplayRunnerRef; + instr: ReplayPeerInstrType; + info: static[string]; + ): Opt[ReplayBuddyRef] = + ## Register a new peer. + ## + if instr.bag.peerCtx.isNone(): + warn info & ": missing peer ctx", n=run.instrNumber, serial=instr.bag.serial + return err() + + run.peers.withValue(instr.bag.peerCtx.value.peerID, val): + warn info & ": peer exists already", n=run.instrNumber, + serial=instr.bag.serial, peer=($val.peer) + val.isNew = false + return ok(val[]) + + var buddy = ReplayBuddyRef( + isNew: true, + run: run, + ctx: run.ctx, + only: BeaconBuddyData( + nRespErrors: (instr.bag.peerCtx.value.nHdrErrors, + instr.bag.peerCtx.value.nBlkErrors)), + peerID: instr.bag.peerCtx.value.peerID, + peer: Peer( + dispatcher: run.ethState.capa, + peerStates: run.ethState.prots, + remote: Node( + node: ENode( + address: enode.Address( + ip: instr.bag.peerIP, + tcpPort: instr.bag.peerPort, + udpPort: instr.bag.peerPort))))) + + run.peers[instr.bag.peerCtx.value.peerID] = buddy + return ok(move buddy) + +# ------------------------------------------------------------------------------ +# Private functions, environment checkers +# ------------------------------------------------------------------------------ + +proc baseStatesDifferImpl( + desc: ReplayRunnerRef|ReplayInstance; + instr: ReplayAnyInstrType; + ignLatestNum: static[bool]; + info: static[string]; + ): bool = + when desc is ReplayRunnerRef: + let (run, peer) = (desc, "n/a") + when desc is ReplayDaemonRef: + let (run, peer) = (desc.run, "n/a") + when desc is ReplayBuddyRef: + let (run, peer) = (desc.run, desc.peer) + + let + ctx = run.ctx + n = run.instrNumber + serial = instr.bag.serial + var + statesDiffer = false + + if serial != n: + statesDiffer = true + info info & ": serial numbers differ", n, peer, serial, expected=n + + if ctx.chain.baseNumber != instr.bag.baseNum: + statesDiffer = true + info info & ": base blocks differ", n, serial, peer, + base=instr.bag.baseNum.bnStr, expected=ctx.chain.baseNumber.bnStr + + when not ignLatestNum: + if ctx.chain.latestNumber != instr.bag.latestNum: + statesDiffer = true + info info & ": latest blocks differ", n, serial, peer, + latest=instr.bag.latestNum.bnStr, expected=ctx.chain.latestNumber.bnStr + + if ctx.pool.lastState != instr.bag.syncState: + statesDiffer = true + info info & ": sync states differ", n, serial, peer, + state=ctx.pool.lastState, expected=instr.bag.syncState + + if ctx.hdrCache.state != instr.bag.chainMode: + statesDiffer = true + info info & ": header chain modes differ", n, serial, peer, + chainMode=ctx.hdrCache.state, expected=instr.bag.chainMode + elif instr.bag.chainMode in {collecting,ready,orphan} and + instr.bag.antecedent != ctx.hdrCache.antecedent.number: + statesDiffer = true + info info & ": header chain antecedents differ", n, serial, peer, + antecedent=ctx.hdrCache.antecedent.bnStr, + expected=instr.bag.antecedent.bnStr + + if ctx.pool.nBuddies != instr.bag.nPeers.int: + statesDiffer = true + info info & ": number of active peers differs", n, serial, peer, + nBuddies=ctx.pool.nBuddies, expected=instr.bag.nPeers + + if ctx.poolMode != instr.bag.poolMode: + statesDiffer = true + info info & ": pool modes/reorgs differ", n, serial, peer, + poolMode=ctx.poolMode, expected=instr.bag.poolMode + + return statesDiffer + + +proc unprocListsDifferImpl( + desc: ReplayRunnerRef|ReplayInstance; + instr: ReplayAnyInstrType; + info: static[string]; + ): bool = + when desc is ReplayRunnerRef: + let (run, peer) = (desc, "n/a") + when desc is ReplayDaemonRef: + let (run, peer) = (desc.run, "n/a") + when desc is ReplayBuddyRef: + let (run, peer) = (desc.run, desc.peer) + + let + ctx = run.ctx + n = run.instrNumber + serial = instr.bag.serial + var + statesDiffer = false + + # Unprocessed block numbers for header + if instr.bag.hdrUnpr.isSome(): + if instr.bag.hdrUnpr.value.hChunks != ctx.hdr.unprocessed.chunks().uint: + statesDiffer = true + info info & ": unproc headers lists differ", n, serial, peer, + listChunks=ctx.hdr.unprocessed.chunks(), + expected=instr.bag.hdrUnpr.value.hChunks + if 0 < instr.bag.hdrUnpr.value.hChunks: + if instr.bag.hdrUnpr.value.hLen != ctx.hdr.unprocessed.total(): + statesDiffer = true + info info & ": unproc headers lists differ", n, serial, peer, + listLen=ctx.hdr.unprocessed.total(), + expected=instr.bag.hdrUnpr.value.hLen + let iv = ctx.hdr.unprocessed.le().expect "valid iv" + if instr.bag.hdrUnpr.value.hLastLen != iv.len: + statesDiffer = true + info info & ": unproc headers lists differ", n, serial, peer, + lastIvLen=iv.len, expected=instr.bag.hdrUnpr.value.hLastLen + if instr.bag.hdrUnpr.value.hLast != iv.maxPt: + statesDiffer = true + info info & ": unproc headers lists differ", n, serial, peer, + lastIvMax=iv.maxPt, expected=instr.bag.hdrUnpr.value.hLast + + # Unprocessed block numbers for blocks + if instr.bag.blkUnpr.isSome(): + if instr.bag.blkUnpr.value.bChunks != ctx.blk.unprocessed.chunks().uint: + statesDiffer = true + info info & ": unproc blocks lists differ", n, serial, peer, + listChunks=ctx.blk.unprocessed.chunks(), + expected=instr.bag.blkUnpr.value.bChunks + if 0 < instr.bag.blkUnpr.value.bChunks: + if instr.bag.blkUnpr.value.bLen != ctx.blk.unprocessed.total(): + statesDiffer = true + info info & ": unproc blocks lists differ", n, serial, peer, + listLen=ctx.blk.unprocessed.total(), + expected=instr.bag.blkUnpr.value.bLen + let iv = ctx.blk.unprocessed.ge().expect "valid iv" + if instr.bag.blkUnpr.value.bLeastLen != iv.len: + statesDiffer = true + info info & ": unproc blocks lists differ", n, serial, peer, + lastIvLen=iv.len, expected=instr.bag.blkUnpr.value.bLeastLen + if instr.bag.blkUnpr.value.bLeast != iv.minPt: + statesDiffer = true + info info & ": unproc blocks lists differ", n, serial, peer, + lastIvMax=iv.maxPt, expected=instr.bag.blkUnpr.value.bLeast + + return statesDiffer + + +proc peerStatesDifferImpl( + buddy: ReplayBuddyRef; + instr: ReplayAnyInstrType; + info: static[string]; + ): bool = + let + peer = buddy.peer + n = buddy.run.instrNumber + serial = instr.bag.serial + var + statesDiffer = false + + if instr.bag.peerCtx.isNone(): + statesDiffer = true + info info & ": peer ctx values differ", n, serial, peer, ctx="n/a" + else: + if instr.bag.peerCtx.value.peerCtrl != buddy.ctrl.state: + statesDiffer = true + info info & ": peer ctrl states differ", n, serial, peer, + ctrl=buddy.ctrl.state, expected=instr.bag.peerCtx.value.peerCtrl + + if instr.bag.peerCtx.value.nHdrErrors != buddy.only.nRespErrors.hdr: + statesDiffer = true + info info & ": peer header errors differ", n, serial, peer, + nHdrErrors=buddy.only.nRespErrors.hdr, + expected=instr.bag.peerCtx.value.nHdrErrors + + if instr.bag.peerCtx.value.nBlkErrors != buddy.only.nRespErrors.blk: + statesDiffer = true + info info & ": peer body errors differ", n, serial, peer, + nBlkErrors=buddy.only.nRespErrors.blk, + expected=instr.bag.peerCtx.value.nBlkErrors + + return statesDiffer + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +func iNum*(desc: ReplayInstance|ReplayRunnerRef): uint = + when desc is ReplayRunnerRef: + desc.instrNumber + else: + desc.run.instrNumber + +func toStr*(w: BlockHashOrNumber): string = + if w.isHash: w.hash.short else: w.number.bnStr + +func peerStr*(desc: ReplayInstance): string = + when desc is ReplayBuddyRef: + $desc.peer + elif desc is ReplayDaemonRef: + "n/a" + +func peerIdStr*(desc: ReplayInstance): string = + when desc is ReplayBuddyRef: + desc.peerID.short + elif desc is ReplayDaemonRef: + "n/a" + +func frameIdStr*(instr: ReplaySchedInstrType): string = + if instr.bag.frameID.isSome(): + instr.bag.frameID.value.idStr + else: + "n/a" + +func frameIdStr*(desc: ReplayBuddyRef|ReplayDaemonRef): string = + if desc.frameID.isSome(): + desc.frameID.value.idStr + else: + "n/a" + +# ----------------- + +proc stopError*(run: ReplayRunnerRef; info: static[string]) = + error info & " -- STOP", n=run.instrNumber + run.stopRunner = false + +proc stopOk*(run: ReplayRunnerRef; info: static[string]) = + info info & " -- STOP", n=run.instrNumber + run.stopRunner = false + +# ----------------- + +proc checkSyncerState*( + desc: ReplayRunnerRef|ReplayInstance; + instr: ReplayAnyInstrType; + ignLatestNum: static[bool]; + info: static[string]; + ): bool + {.discardable.} = + ## Check syncer states against all captured state variables of the + ## `instr` argument. + var statesDiffer = false + + if desc.baseStatesDifferImpl(instr, ignLatestNum, info): + statesDiffer = true + + if desc.unprocListsDifferImpl(instr, info): + statesDiffer = true + + when desc is ReplayBuddyRef: + if desc.peerStatesDifferImpl(instr, info): + statesDiffer = true + + return statesDiffer + +proc checkSyncerState*( + desc: ReplayRunnerRef|ReplayInstance; + instr: ReplayAnyInstrType; + info: static[string]; + ): bool + {.discardable.} = + desc.checkSyncerState(instr, false, info) + +# ------------------------------------------------------------------------------ +# Public functions, peer/daemon descriptor management +# ------------------------------------------------------------------------------ + +proc getPeer*( + run: ReplayRunnerRef; + instr: ReplayPeerInstrType|ReplayMsgInstrType; + info: static[string]; + ): Opt[ReplayBuddyRef] = + ## Get peer from peers table (if any) + if instr.bag.peerCtx.isNone(): + warn info & ": missing peer ctx", n=run.iNum, serial=instr.bag.serial + else: + run.peers.withValue(instr.bag.peerCtx.value.peerID, buddy): + return ok(buddy[]) + debug info & ": no peer", n=run.iNum, serial=instr.bag.serial, + peerID=instr.bag.peerCtx.value.peerID.short + return err() + + +proc newPeer*( + run: ReplayRunnerRef; + instr: ReplaySchedStart; + info: static[string]; + ): Opt[ReplayBuddyRef] = + ## Register a new peer. + ## + return run.newPeerImpl(instr, info) + + +proc getOrNewPeerFrame*( + run: ReplayRunnerRef; + instr: ReplayPeerInstrType; + info: static[string]; + ): Opt[ReplayBuddyRef] = + ## Get an existing one or register a new peer and set up `stage[0]`. + ## + if instr.bag.peerCtx.isNone(): + return err() + + var buddy: ReplayBuddyRef + run.peers.withValue(instr.bag.peerCtx.value.peerID, val): + buddy = val[] + buddy.isNew = false + do: + buddy = run.newPeerImpl(instr, info).expect "valid peer" + + if buddy.frameID.isSome(): + warn info & ": peer frameID unexpected", n=buddy.iNum, + serial=instr.bag.serial, frameID=buddy.frameIdStr, expected="n/a" + if instr.bag.frameID.isNone(): + warn info & ": peer instr frameID missing", n=buddy.iNum, + serial=instr.bag.serial, frameID="n/a" + + buddy.frameID = instr.bag.frameID + return ok(move buddy) + + +proc delPeer*( + buddy: ReplayBuddyRef; + info: static[string]; + ) = + ## Delete peer ID from registry and return the environment for the + ## deleted peer ID. + ## + let run = buddy.run + if run.peers.hasKey(buddy.peerID): + run.peers.del buddy.peerID + else: + trace info & ": stale peer ignored", n=buddy.iNum, + peer=($buddy.peer), peerID=buddy.peerID.short + +# ----------------- + +proc getDaemon*( + run: ReplayRunnerRef; + info: static[string]; + ): Opt[ReplayDaemonRef] = + ## Similar to `getPeer()` for daemon + if not run.daemon.isNil: + return ok(run.daemon) + + warn info & ": no daemon", n=run.instrNumber + return err() + + +proc newDaemonFrame*( + run: ReplayRunnerRef; + instr: ReplaySchedDaemonBegin; + info: static[string]; + ): Opt[ReplayDaemonRef] = + ## Similar to `getOrNewPeerFrame()` for daemon. + if run.daemon.isNil: + if instr.bag.frameID.isNone(): + warn info & ": daemon instr frameID missing", n=run.iNum, + serial=instr.bag.serial, frameID="n/a" + run.daemon = ReplayDaemonRef( + run: run, + frameID: instr.bag.frameID) + return ok(run.daemon) + + warn info & ": daemon already registered", n=run.iNum, + serial=instr.bag.serial, frameID=instr.frameIdStr + return err() + + +proc delDaemon*( + daemon: ReplayDaemonRef; + info: static[string]; + ) = + ## Similar to `delPeer()` for daemon + let run = daemon.run + if run.daemon.isNil: + trace info & ": stale daemon ignored", n=run.instrNumber + else: + run.daemon = ReplayDaemonRef(nil) + +# ------------------------------------------------------------------------------ +# Public functions, process/handler synchronisation +# ------------------------------------------------------------------------------ + +proc waitForSyncedEnv*( + desc: ReplayInstance; + instr: ReplaySchedInstrType ; + info: static[string]; + ): Future[ReplayWaitResult] + {.async: (raises: []).} = + ## .. + ## + when desc is ReplayBuddyRef: + # The scheduler (see `sync_sched.nim`) might have disconnected the peer + # already as is captured in the instruction environment. This does not + # apply to `zombie` settings which will be done by the application. + if instr.bag.peerCtx.isNone(): + warn info & ": missing peer ctx", n=desc.iNum, serial=instr.bag.serial + return err((ENoException,"",info&": missing peer ctx")) + if instr.bag.peerCtx.value.peerCtrl == Stopped and not desc.ctrl.stopped: + desc.ctrl.stopped = true + + let + serial {.inject,used.} = instr.bag.serial + peer {.inject,used.} = desc.peerStr + peerID {.inject,used.} = desc.peerIdStr + + trace info & ": process to be synced", n=desc.iNum, serial, peer, peerID + + let rc = desc.run.waitForConditionImpl(info): + if tmoPending: + debug info & ": syncing", n=desc.iNum, serial, peer, count + desc.checkSyncerState(instr, info & ", syncing") + desc.syncedEnvCondImpl(instr, info) # cond result + + if rc.isErr(): + # Shutdown? + trace info & ": process sync error", n=desc.iNum, + serial, peer, peerID, name=rc.error.name, msg=rc.error.msg + return err(rc.error) + + trace info & ": process synced ok", n=desc.iNum, serial, peer, peerID + desc.checkSyncerState(instr, ignLatestNum=true, info) # relaxed check + + return ok() + +# ------------------ + +proc processFinishedClearFrame*( + desc: ReplayInstance; + instr: ReplaySchedDaemonBegin|ReplaySchedPeerBegin|ReplaySchedPool; + info: static[string]; + ) = + ## Register that the process has finished + ## + # Verify that sub-processes did not change the environment + if desc.frameID != instr.bag.frameID: + warn info & ": frameIDs differ", n=desc.iNum, serial=instr.bag.serial, + peer=desc.peerStr, frameID=desc.frameIdStr, expected=instr.frameIdStr + + # Mark the pocess `done` + desc.frameID = Opt.none(uint) + + trace info & ": terminating", n=desc.iNum, + serial=instr.bag.serial, frameID=instr.frameIdStr, peer=desc.peerStr + + +template whenProcessFinished*( + desc: ReplayInstance; + instr: ReplaySchedDaemonEnd|ReplaySchedPeerEnd; + info: static[string]; + ): ReplayWaitResult = + ## Async/template + ## + var bodyRc = ReplayWaitResult.ok() + block body: + let + peer {.inject,used.} = desc.peerStr + peerID {.inject,used.} = desc.peerIdStr + serial {.inject,used.} = instr.bag.serial + + if desc.frameID.isSome(): + doAssert desc.frameID == instr.bag.frameID + + trace info & ": wait for terminated", n=desc.iNum, serial, + frameID=instr.frameIdStr, peer, peerID + + bodyRc = desc.run.waitForConditionImpl(info): + if tmoPending: + debug info & ": wait for terminated", n, serial, peer, count + desc.frameID.isNone() + + if bodyRc.isErr: + break body + + trace info & ": terminated OK", n=desc.iNum, serial, + frameID=instr.frameIdStr, peer + desc.checkSyncerState(instr, ignLatestNum=true, info) # relaxed check + + # Synchronise against captured environment + bodyRc = desc.run.waitForConditionImpl(info): + if tmoPending: + debug info & ": syncing", n=desc.iNum, serial, peer, peerID, count + desc.checkSyncerState(instr, info & ", syncing") + desc.syncedEnvCondImpl(instr, info) # cond result + + if bodyRc.isErr: + break body + + trace info & ": finished", n=desc.iNum, serial, + frameID=instr.frameIdStr, peer, peerID + # End body + + bodyRc # result + +# ------------------ + +template pushInstr*( + desc: ReplayInstance; + instr: ReplayMsgInstrType; + info: static[string]; + ): ReplayWaitResult = + ## Async/template + ## + ## Stage session data, then wait for the background process to consume the + ## session data using `withInstr()`. + ## + var bodyRc = ReplayWaitResult.ok() + block: + # Verify that the stage is based on a proper environment + doAssert desc.frameID.isSome() # this is not `instr.bag.frameID` + + let + peer {.inject,used.} = desc.peerStr + peerID {.inject,used.} = desc.peerIdStr + dataType {.inject.} = instr.recType + serial {.inject.} = instr.bag.serial + + doAssert serial == desc.iNum + doAssert desc.message.isNil + + # Stage/push session data + desc.message = instr + + block body: + # Wait for sync # FIXME, really needed? + bodyRc = desc.run.waitForConditionImpl(info): # FIXME, really needed? + if tmoPending: + debug info & ": syncing", n, serial, peer, peerID, dataType, count + desc.checkSyncerState(instr, info & ", syncing") + desc.syncedEnvCondImpl(instr, info) # cond result + + if bodyRc.isErr(): + break body + + doAssert serial == desc.iNum + + trace info & ": sent data", n=desc.iNum, serial, peer, peerID, dataType + + # Wait for message to be swallowed + bodyRc = desc.run.waitForConditionImpl(info): + if tmoPending: + debug info & ": wait ackn", n, serial, peer, peerID, dataType, count + desc.message.isNil # cond result + + if bodyRc.isErr(): + break body + # End body + + trace info & ": done", n=desc.iNum, serial, peer, peerID, dataType + doAssert desc.iNum == serial + + bodyRc # result + + +template withInstr*( + desc: ReplayInstance; + R: type ReplayMsgInstrType; + info: static[string]; + code: untyped; + ) = + ## Async/template + ## + ## Execude the argument `code` with the data sent by a feeder. The variables + ## and functions available for `code` are: + ## * `instr` -- instr data, available if `instr.isAvailable()` is `true` + ## * `iError` -- error data, initialised if `instr.isAvailable()` is `false` + ## + block: + const dataType {.inject.} = (typeof R().bag).toTraceRecType + + when R is ReplayFetchBodies or + R is ReplaySyncBodies or + R is ReplayImportBlock: + const ignLatestNum = true # relax, de-noise + else: + const ignLatestNum = false + + let + run = desc.run + peer {.inject,used.} = desc.peerStr + peerID {.inject,used.} = desc.peerIdStr + + trace info & ": get data", n=desc.iNum, serial="n/a", peer, dataType + + # Reset flag and wait for staged data to disappear from stack + let rc = run.waitForConditionImpl(info): + if tmoPending: + debug info & ": expecting data", n, serial="n/a", + peer, peerID, dataType, count + not desc.message.isNil # cond result + + var + iError {.inject.}: ReplayWaitError + instr {.inject.}: R + + if rc.isOk(): + instr = R(desc.message) + doAssert desc.message.recType == dataType + doAssert instr.bag.serial == desc.iNum + + when desc is ReplayBuddyRef: + # The scheduler (see `sync_sched.nim`) might have disconnected the + # peer already which would be captured in the instruction environment. + # This does not apply to `zombie` settings which will be handled by + # the application `code`. + if instr.bag.peerCtx.isNone(): + warn info & ": missing peer ctx", n=desc.iNum, + serial=instr.bag.serial, peer, peerID, dataType + desc.ctrl.stopped = true + elif instr.bag.peerCtx.value.peerCtrl == Stopped and + not desc.ctrl.stopped: + desc.ctrl.stopped = true + else: + iError = rc.error + + template isAvailable(_: R): bool {.used.} = rc.isOk() + + code + + if rc.isOk(): + doAssert not desc.message.isNil + doAssert desc.message.recType == dataType + doAssert instr.bag.serial == desc.iNum + + desc.checkSyncerState(instr, ignLatestNum, info) + + debug info & ": got data", n=desc.iNum, serial=instr.bag.serial, + peer, peerID, dataType + + desc.message = ReplayPayloadRef(nil) + + discard # no-op, visual alignment + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_sched.nim b/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_sched.nim new file mode 100644 index 0000000000..46a0ba50ce --- /dev/null +++ b/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_sched.nim @@ -0,0 +1,212 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + std/tables, + pkg/[chronicles, chronos, eth/common], + ../runner_desc, + ./dispatch_helpers + +logScope: + topics = "replay runner" + +# ------------------------------------------------------------------------------ +# Private helper +# ------------------------------------------------------------------------------ + +proc schedDaemonProcessImpl( + daemon: ReplayDaemonRef; + instr: ReplaySchedDaemonBegin; + info: static[string]; + ) {.async: (raises: []).} = + ## Run the task `schedDaemon()`. This function has to be run background + ## process (using `asyncSpawn`.) + ## + let run = daemon.run + trace info & ": begin", n=run.iNum, serial=instr.bag.serial, + frameID=instr.frameIdStr, syncState=instr.bag.syncState + + discard await run.backup.schedDaemon(run.ctx) + daemon.processFinishedClearFrame(instr, info) + + trace info & ": end", n=run.iNum, serial=instr.bag.serial, + frameID=instr.frameIdStr, syncState=instr.bag.syncState + + +proc schedPeerProcessImpl( + buddy: ReplayBuddyRef; + instr: ReplaySchedPeerBegin; + info: static[string]; + ) {.async: (raises: []).} = + ## Run the task `schedPeer()`. This function has to be run background + ## process (using `asyncSpawn`.) + ## + let run = buddy.run + trace info & ": begin", n=run.iNum, serial=instr.bag.serial, + frameID=instr.frameIdStr, peer=($buddy.peer), peerID=buddy.peerID.short, + syncState=instr.bag.syncState + + # Activate peer + buddy.run.nPeers.inc + + discard await run.backup.schedPeer(buddy) + buddy.processFinishedClearFrame(instr, info) + + trace info & ": end", n=run.iNum, serial=instr.bag.serial, + frameID=instr.frameIdStr, peer=($buddy.peer), peerID=buddy.peerID.short, + syncState=instr.bag.syncState + +# ------------------------------------------------------------------------------ +# Public dispatcher handlers +# ------------------------------------------------------------------------------ + +proc schedDaemonBegin*( + run: ReplayRunnerRef; + instr: ReplaySchedDaemonBegin; + ) {.async: (raises: []).} = + ## Run the `schedDaemon()` task. + ## + # Synchronise against captured environment and start process + const info = instr.replayLabel() + let daemon = run.newDaemonFrame(instr, info).valueOr: return + discard await daemon.waitForSyncedEnv(instr, info) + asyncSpawn daemon.schedDaemonProcessImpl(instr, info) + + +proc schedDaemonEnd*( + run: ReplayRunnerRef; + instr: ReplaySchedDaemonEnd; + ) {.async: (raises: []).} = + ## Clean up (in foreground) after `schedDaemon()` process has terminated. + ## + const info = instr.replayLabel() + let daemon = run.getDaemon(info).valueOr: return + daemon.whenProcessFinished(instr, info).isErrOr: + daemon.delDaemon(info) # Clean up + + +proc schedStartWorker*( + run: ReplayRunnerRef; + instr: ReplaySchedStart; + ) = + ## Runs `schedStart()` in the foreground. + ## + const + info = instr.replayLabel() + let + buddy = run.newPeer(instr, info).valueOr: return + accept = run.backup.schedStart(buddy) + + trace info & ": begin", n=run.iNum, serial=instr.bag.serial, + peer=($buddy.peer), peerID=buddy.peerID.short + + if accept != instr.bag.accept: + warn info & ": result argument differs", n=run.iNum, + serial=instr.bag.serial, peer=buddy.peer, expected=instr.bag.accept, + result=accept + + # Syncer state was captured when leaving the `schedStart()` handler. + buddy.checkSyncerState(instr, ignLatestNum=true, info) # relaxed check + + if not accept: + buddy.delPeer(info) # Clean up + + trace info & ": end", n=run.iNum, serial=instr.bag.serial, + peer=($buddy.peer), peerID=buddy.peerID.short + + +proc schedStopWorker*( + run: ReplayRunnerRef; + instr: ReplaySchedStop; + ) = + ## Runs `schedStop()` in the foreground. + ## + const info = instr.replayLabel() + let buddy = run.getOrNewPeerFrame(instr, info).valueOr: return + run.backup.schedStop(buddy) + + # As the `schedStop()` function environment was captured only after the + # syncer was activated, there might still be some unregistered peers hanging + # around. So it is perfectly OK to see the peer for the first time, here + # which has its desciptor sort of unintialised (relative to `instr`.) + if not buddy.isNew: + # Syncer state was captured when leaving the `schedStop()` handler. + if instr.bag.peerCtx.isNone(): + warn info & ": peer ctx missing", n=run.iNum, serial=instr.bag.serial + return + if instr.bag.peerCtx.value.peerCtrl == Stopped and not buddy.ctrl.stopped: + buddy.ctrl.stopped = true + buddy.checkSyncerState(instr, info) + + # Clean up + buddy.delPeer(info) + + trace info & ": done", n=run.iNum, serial=instr.bag.serial, + peer=($buddy.peer), peerID=buddy.peerID.short + + +proc schedPoolWorker*( + run: ReplayRunnerRef; + instr: ReplaySchedPool; + ) = + ## Runs `schedPool()` in the foreground. + ## + const info = instr.replayLabel() + let buddy = run.getOrNewPeerFrame(instr, info).valueOr: return + + if 0 < run.nPeers: + warn info & ": no active peers allowed", n=run.iNum, + serial=instr.bag.serial, peer=buddy.peer, nPeers=run.nPeers, expected=0 + + # The scheduler will reset the `poolMode` flag before starting the + # `schedPool()` function. + run.ctx.poolMode = false + + discard run.backup.schedPool(buddy, instr.bag.last, instr.bag.laps.int) + + # Syncer state was captured when leaving the `schedPool()` handler. + buddy.checkSyncerState(instr, info) + buddy.processFinishedClearFrame(instr, info) + + info info & ": done", n=run.iNum, serial=instr.bag.serial, + peer=($buddy.peer), peerID=buddy.peerID.short + + +proc schedPeerBegin*( + run: ReplayRunnerRef; + instr: ReplaySchedPeerBegin; + ) {.async: (raises: []).} = + ## Run the `schedPeer()` task. + ## + # Synchronise against captured environment and start process + const info = instr.replayLabel() + let buddy = run.getOrNewPeerFrame(instr, info).valueOr: return + discard await buddy.waitForSyncedEnv(instr, info) + asyncSpawn buddy.schedPeerProcessImpl(instr, info) + + +proc schedPeerEnd*( + run: ReplayRunnerRef; + instr: ReplaySchedPeerEnd; + ) {.async: (raises: []).} = + ## Clean up (in foreground) after `schedPeer()` process has terminated. + ## + const info = instr.replayLabel() + let buddy = run.getPeer(instr, info).valueOr: return + buddy.whenProcessFinished(instr, info).isErrOr: + buddy.run.nPeers.dec # peer is not active, anymore + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_sync.nim b/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_sync.nim new file mode 100644 index 0000000000..5bf9e709ee --- /dev/null +++ b/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_sync.nim @@ -0,0 +1,84 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/chronicles, + ../../../../../execution_chain/core/chain, + ../runner_desc, + ./dispatch_helpers + +logScope: + topics = "replay runner" + +# ------------------------------------------------------------------------------ +# Public dispatcher handlers +# ------------------------------------------------------------------------------ + +proc syncActvFailedWorker*(run: ReplayRunnerRef; instr: ReplaySyncActvFailed) = + const info = instr.replayLabel() + trace info, n=run.iNum, serial=instr.bag.serial + + +proc syncActivateWorker*(run: ReplayRunnerRef; instr: ReplaySyncActivated) = + const + info = instr.replayLabel() + let + serial = instr.bag.serial + ctx = run.ctx + + if not ctx.hibernate: + warn info & ": already activated", n=run.iNum, serial + return + + var activationOK = true + if ctx.chain.baseNumber != instr.bag.baseNum: + error info & ": cannot activate (bases must match)", n=run.iNum, serial, + base=ctx.chain.baseNumber.bnStr, expected=instr.bag.baseNum.bnStr + activationOK = false + + if activationOK: + ctx.hdrCache.headTargetUpdate(instr.bag.head, instr.bag.finHash) + + # Set the number of active buddies (avoids some moaning.) + run.ctx.pool.nBuddies = instr.bag.nPeers.int + run.checkSyncerState(instr, info) + + if ctx.hibernate or not activationOK: + const failedInfo = info & ": activation failed" + trace failedInfo, n=run.iNum, serial + run.stopError(failedInfo) + else: + # No need for scheduler noise (e.g. disconnect messages.) + ctx.noisyLog = false + debug info, n=run.iNum, serial + + +proc syncSuspendWorker*(run: ReplayRunnerRef; instr: ReplaySyncHibernated) = + const info = instr.replayLabel() + if not run.ctx.hibernate: + run.stopError(info & ": suspend failed") + return + + run.checkSyncerState(instr, info) + debug info, n=run.iNum, serial=instr.bag.serial + + # Shutdown if there are no remaining sessions left + if 1 < run.nSessions: + run.nSessions.dec + else: + run.stopOk(info & ": session(s) terminated") + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_version.nim b/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_version.nim new file mode 100644 index 0000000000..73247f1a00 --- /dev/null +++ b/tools/syncer/replay/replay_runner/runner_dispatch/dispatch_version.nim @@ -0,0 +1,74 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/chronicles, + ../../../../../execution_chain/core/chain, + ../runner_desc, + ./dispatch_helpers + +logScope: + topics = "replay runner" + +# ------------------------------------------------------------------------------ +# Public dispatcher handlers +# ------------------------------------------------------------------------------ + +proc versionInfoWorker*(run: ReplayRunnerRef; instr: ReplayVersionInfo) = + const + info = instr.replayLabel() + let + serial = instr.bag.serial + ctx = run.ctx + var + versionOK = true + + if serial != 1: + error info & ": not the first record", serial, expected=1 + versionOK = false + + if run.iNum != 1: + error info & ": record count mismatch", n=run.iNum, expected=1 + versionOK = false + + if instr.bag.version != TraceVersionID: + error info & ": wrong version", serial, + traceLayoutVersion=instr.bag.version, expected=TraceVersionID + versionOK = false + + if instr.bag.networkId != ctx.chain.com.networkId: + error info & ": wrong network", serial, + networkId=instr.bag.networkId, expected=ctx.chain.com.networkId + versionOK = false + + if ctx.chain.baseNumber < instr.bag.baseNum: + error info & ": cannot start (base too low)", serial, + base=ctx.chain.baseNumber.bnStr, replayBase=instr.bag.baseNum.bnStr + versionOK = false + + if not ctx.hibernate: + error info & ": syncer must not be activated, yet", serial + versionOK = false + + if not versionOK: + run.stopError(info & ": version match failed") + return + + chronicles.info info, n=run.iNum, serial, TraceVersionID, + base=ctx.chain.baseNumber.bnStr, latest=ctx.chain.latestNumber.bnStr + run.checkSyncerState(instr, info) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_runner/runner_init.nim b/tools/syncer/replay/replay_runner/runner_init.nim new file mode 100644 index 0000000000..932c8c42fe --- /dev/null +++ b/tools/syncer/replay/replay_runner/runner_init.nim @@ -0,0 +1,77 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/chronos, + ../../../../execution_chain/networking/[p2p, p2p_peers, peer_pool], + ../../../../execution_chain/sync/wire_protocol, + ./runner_desc + +logScope: + topics = "replay" + +# ------------------------------------------------------------------------------ +# Private helper(s) +# ------------------------------------------------------------------------------ + +proc getDispatcher(): Dispatcher = + ## Return a list of all known protocols and pretend all are supported + var po = PeerObserver() + po.addProtocol eth68 + po.addProtocol eth69 + + var q: array[MAX_PROTOCOLS,Opt[uint64]] + q[0] = Opt.none(uint64) + q[1] = Opt.some(16'u64) + for n in 2 .. po.protocols.len: + q[n] = Opt.some(q[n-1].value + po.protocols[n-1].messages[^1].id) + + Dispatcher(protocolOffsets: q) + + +proc getProtocolStates(): array[MAX_PROTOCOLS,RootRef] = + ## Pretend that all `getDispatcher()` list items are initialised + var q: typeof(result) + q[0] = RootRef(nil) + q[1] = EthPeerState(initialized: true) + q[2] = Eth69PeerState(initialized: true) + q + + +proc init(T: type ReplayEthState): T = + ## For ethxx compatibility + T(capa: getDispatcher(), + prots: getProtocolStates()) + +# ------------------------------------------------------------------------------ +# Public constructor(s) +# ------------------------------------------------------------------------------ + +proc initRunner*(rpl: ReplayRunnerRef) = + ## Initialise dispatcher + const info = "ReplayRunnerRef(): " + if ReplayRunnerID != rpl.ctx.handler.version: + fatal info & "Need original handlers version", + handlerVersion=rpl.ctx.handler.version + quit(QuitFailure) + + rpl.ethState = ReplayEthState.init() + + +proc destroyRunner*(run: ReplayRunnerRef) = + discard + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/replay/replay_setup.nim b/tools/syncer/replay/replay_setup.nim new file mode 100644 index 0000000000..ff13d777e0 --- /dev/null +++ b/tools/syncer/replay/replay_setup.nim @@ -0,0 +1,195 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Trace environment setup & destroy + +{.push raises:[].} + +import + std/[streams, strutils], + pkg/[chronicles, chronos], + ../../../execution_chain/sync/wire_protocol, + ./replay_reader/reader_init, + ./replay_runner/runner_dispatch/[dispatch_blocks, dispatch_headers], + ./replay_runner/[runner_desc, runner_init], + ./replay_runner + +logScope: + topics = "beacon replay" + +const + DontQuit = low(int) + ## To be used with `onCloseException()` + + stopInfo = "replayStop()" + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +template onException( + info: static[string]; + quitCode: static[int]; + code: untyped) = + try: + code + except CatchableError as e: + const blurb = info & "Replay stream exception -- STOP" + when quitCode == DontQuit: + error blurb, error=($e.name), msg=e.msg + else: + fatal blurb, error=($e.name), msg=e.msg + quit(quitCode) + +# ------------------------------------------------------------------------------ +# Private replacement handlers +# ------------------------------------------------------------------------------ + +proc noOpBuddy(buddy: BeaconBuddyRef) = + discard + +proc noOpSchedStartFalse(buddy: BeaconBuddyRef): bool = + return false + +proc noOpSchedPoolTrue(a: BeaconBuddyRef, b: bool, c: int): bool = + return true + +proc noOpSchedDaemon(ctx: BeaconCtxRef): + Future[Duration] {.async: (raises: []).} = + return replayWaitMuted + +proc noOpSchedPeer(buddy: BeaconBuddyRef): + Future[Duration] {.async: (raises: []).} = + return replayWaitMuted + +# ------------------------------------------------------------------------------ +# Private functions +# ------------------------------------------------------------------------------ + +proc checkStop(rpl: ReplayRunnerRef): ReplayStopIfFn = + proc(): bool = + if rpl.stopRunner: + return true + false + +proc cleanUp(rpl: ReplayRunnerRef): ReplayEndUpFn = + proc() = + rpl.stopSync(rpl) + if rpl.stopQuit: + notice stopInfo & ": quitting .." + quit(QuitSuccess) + else: + info stopInfo & ": terminating .." + +# -------------- + +proc replayStartCB(rpl: ReplayRunnerRef) = + ## Start replay emulator + ## + # Set up redirect handlers for replay + rpl.version = ReplayRunnerID + # activate # use as is + # suspend # use as is + rpl.reader = ReplayReaderRef.init(rpl.captStrm) + rpl.schedDaemon = noOpSchedDaemon + rpl.schedStart = noOpSchedStartFalse # `false` => don't register + rpl.schedStop = noOpBuddy + rpl.schedPool = noOpSchedPoolTrue # `true` => stop repeating + rpl.schedPeer = noOpSchedPeer + rpl.getBlockHeaders = fetchHeadersHandler # from dispatcher + rpl.syncBlockHeaders = noOpBuddy + rpl.getBlockBodies = fetchBodiesHandler # from dispatcher + rpl.syncBlockBodies = noOpBuddy + rpl.importBlock = importBlockHandler # from dispatcher + rpl.syncImportBlock = noOpBuddy + + rpl.initRunner() + + rpl.startSync = proc(self: BeaconHandlersSyncRef) = + discard + + rpl.stopSync = proc(self: BeaconHandlersSyncRef) = + ReplayRunnerRef(self).reader.destroy() + ReplayRunnerRef(self).destroyRunner() + stopInfo.onException(DontQuit): + ReplayRunnerRef(self).captStrm.close() + ReplayRunnerRef(self).ctx.pool.handlers = ReplayRunnerRef(self).backup + + # Start fake scheduler + asyncSpawn rpl.runDispatcher( + rpl.reader, stopIf=rpl.checkStop, endUp=rpl.cleanUp) + +# ------------------------------------------------------------------------------ +# Public constructor/destructor +# ------------------------------------------------------------------------------ + +proc replaySetup*( + ctx: BeaconCtxRef; + fileName: string; + noStopQuit: bool; + fakeImport: bool; + ): Result[void,string] = + ## setup replay emulator + ## + const info = "replaySetup(): " + + if ctx.handler.version != 0: + return err("Overlay session handlers activated already" & + "ID=" & $ctx.handler.version) + + let strm = fileName.newFileStream fmRead + if strm.isNil: + return err("Cannot open trace file for reading" & + ", fileName=\"" & fileName & "\"") + + let rpl = ReplayRunnerRef( + ctx: ctx, + captStrm: strm, + fakeImport: fakeImport, + stopQuit: not noStopQuit, + backup: ctx.pool.handlers, + + # This is still the old descriptor which will be updated when + # `startSync()` is run. + version: ReplayRunnerID, + activate: ctx.handler.activate, + suspend: ctx.handler.suspend, + schedDaemon: ctx.handler.schedDaemon, + schedStart: ctx.handler.schedStart, + schedStop: ctx.handler.schedStop, + schedPool: ctx.handler.schedPool, + schedPeer: ctx.handler.schedPeer, + getBlockHeaders: ctx.handler.getBlockHeaders, + syncBlockHeaders: ctx.handler.syncBlockHeaders, + getBlockBodies: ctx.handler.getBlockBodies, + syncBlockBodies: ctx.handler.syncBlockBodies, + importBlock: ctx.handler.importBlock, + syncImportBlock: ctx.handler.syncImportBlock) + + rpl.startSync = proc(self: BeaconHandlersSyncRef) = + ReplayRunnerRef(self).replayStartCB() + + rpl.stopSync = proc(self: BeaconHandlersSyncRef) = + info.onException(DontQuit): + ReplayRunnerRef(self).captStrm.close() + ReplayRunnerRef(self).ctx.pool.handlers = ReplayRunnerRef(self).backup + + ctx.pool.handlers = rpl + ok() + + +proc replayRelease*(ctx: BeaconCtxRef) = + ## Stop replay and restore descriptors + if ctx.pool.handlers.version in {ReplaySetupID, ReplayRunnerID}: + ReplayRunnerRef(ctx.pool.handlers).stopSync(nil) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/syncer_test_client_inspect.nim b/tools/syncer/syncer_test_client_inspect.nim new file mode 100644 index 0000000000..857f95ee3a --- /dev/null +++ b/tools/syncer/syncer_test_client_inspect.nim @@ -0,0 +1,50 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +# * MIT license ([LICENSE-MIT](LICENSE-MIT)) +# at your option. +# This file may not be copied, modified, or distributed except according to +# those terms. + +import + std/[cmdline, os, streams, strutils, terminal], + pkg/[chronicles, confutils], + pkg/beacon_chain/process_state, + ./replay/replay_reader + +const + fgSection = fgYellow + +type + ToolConfig* = object of RootObj + captureFile {. + separator: "INSPECT TOOL OPTIONS:" + desc: "Read from argument and print its contents" + name: "capture-file" .}: InputFile + +# ------------------------------------------------------------------------------ +# Main +# ------------------------------------------------------------------------------ + +let + config = ToolConfig.load( + cmdLine = commandLineParams(), + copyrightBanner = ansiForegroundColorCode(fgSection) & + "\pNimbus capture file inspection tool.\p") + name = config.captureFile.string + +if not name.fileExists: + fatal "No such capture file", name + quit(QuitFailure) + +ProcessState.setupStopHandlers() +ProcessState.notifyRunning() + +let reader = ReplayReaderRef.init(name.newFileStream fmRead) +reader.captureLog(stop = proc: bool = + ProcessState.stopIt(notice("Terminating", reason = it))) + +quit(QuitSuccess) + +# End diff --git a/tools/syncer/syncer_test_client_inspect.nim.cfg b/tools/syncer/syncer_test_client_inspect.nim.cfg new file mode 100644 index 0000000000..cb44702917 --- /dev/null +++ b/tools/syncer/syncer_test_client_inspect.nim.cfg @@ -0,0 +1,5 @@ +-d:"chronicles_sinks=textlines[stderr]" +-d:"chronicles_runtime_filtering=on" +-d:"chronicles_line_numbers=0" +-d:"chronicles_thread_ids=no" +-d:"chronicles_log_level=TRACE" diff --git a/tools/syncer/syncer_test_client_replay.nim b/tools/syncer/syncer_test_client_replay.nim new file mode 100644 index 0000000000..5fef45288c --- /dev/null +++ b/tools/syncer/syncer_test_client_replay.nim @@ -0,0 +1,107 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +# * MIT license ([LICENSE-MIT](LICENSE-MIT)) +# at your option. +# This file may not be copied, modified, or distributed except according to +# those terms. + +import + std/[cmdline, os, strutils, terminal], + pkg/[chronicles, results], + ../../execution_chain/[config, nimbus_desc, nimbus_execution_client], + ../../execution_chain/sync/beacon, + ./helpers/sync_ticker, + ./replay/replay_setup + +const + fgSection = fgYellow + fgOption = fgBlue + +type + ToolConfig* = object of RootObj + captureFile {. + separator: "REPLAY TOOL OPTIONS:" + desc: "Read from trace capture file and replay the " & + " syncer session" + name: "capture-file" .}: InputFile + + noStopQuit {. + desc: "Continue as normal after the captured replay states are " & + "exhausted. If the option is given, the program will terminate" + defaultValue: false + name: "no-stop-quit" .}: bool + + fakeImport {. + desc: "The tool will not import blocks while replaying" + defaultValue: false + name: "enable-sync-ticker" .}: bool + + noSyncTicker {. + desc: "Disable logging sync status regularly" + defaultValue: false + name: "disable-sync-ticker" .}: bool + + SplitCmdLine = tuple + leftArgs: seq[string] # split command line: left to "--" marker (nimbus) + rightArgs: seq[string] # split command line: right to "--" marker (tool) + +# ------------------------------------------------------------------------------ +# Private helpers, command line parsing tools +# ------------------------------------------------------------------------------ + +proc splitCmdLine(): SplitCmdLine = + ## Split commans line options + ## :: + ## [ --] [ --] []") + + leftConf = makeConfig(cmdLine = leftOpts) + + # Update node config for lazy beacon sync update + nodeConf = NimbusNode( + beaconSyncRef: BeaconSyncRef.init rightConf.beaconSyncConfig) + +# Run execution client +leftConf.main(nodeConf) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/syncer_test_client_replay.nim.cfg b/tools/syncer/syncer_test_client_replay.nim.cfg new file mode 100644 index 0000000000..580b97645a --- /dev/null +++ b/tools/syncer/syncer_test_client_replay.nim.cfg @@ -0,0 +1,5 @@ +-d:"chronicles_sinks=textlines[stderr]" +-d:"chronicles_runtime_filtering=on" +-d:"chronicles_line_numbers=1" +-d:"chronicles_thread_ids=no" +-d:"chronicles_log_level=TRACE" diff --git a/tools/syncer/syncer_test_client_trace.nim b/tools/syncer/syncer_test_client_trace.nim new file mode 100644 index 0000000000..7fc5d675e9 --- /dev/null +++ b/tools/syncer/syncer_test_client_trace.nim @@ -0,0 +1,112 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +# * MIT license ([LICENSE-MIT](LICENSE-MIT)) +# at your option. +# This file may not be copied, modified, or distributed except according to +# those terms. + +import + std/[cmdline, os, strutils, terminal], + pkg/[chronicles, confutils, results], + ../../execution_chain/[config, nimbus_desc, nimbus_execution_client], + ../../execution_chain/sync/beacon, + ./helpers/sync_ticker, + ./trace/trace_setup + +const + fgSection = fgYellow + fgOption = fgBlue + +type + ToolConfig* = object of RootObj + captureFile {. + separator: "TRACE TOOL OPTIONS:" + desc: "Store captured states in the argument. If this " & + "option is missing, no capture file is written" + name: "capture-file" .}: Option[OutFile] + + nSessions {. + desc: "Run a trace for this many sessions (i.e. from activation to " & + "suspension)" + defaultValue: 1 + name: "num-trace-sessions" .}: uint + + nPeersMin {. + desc: "Minimal number of peers needed for activating the first syncer " & + "session" + defaultValue: 0 + name: "num-peers-min" .}: uint + + noSyncTicker {. + desc: "Disable logging sync status regularly" + defaultValue: false + name: "disable-sync-ticker" .}: bool + + SplitCmdLine = tuple + leftArgs: seq[string] # split command line: left to "--" marker (nimbus) + rightArgs: seq[string] # split command line: right to "--" marker (tool) + +# ------------------------------------------------------------------------------ +# Private functions +# ------------------------------------------------------------------------------ + +proc splitCmdLine(): SplitCmdLine = + ## Split commans line options + ## :: + ## [ --] [ --] []") + + leftConf = makeConfig(cmdLine = leftOpts) + + # Update node config for lazy beacon sync update + nodeConf = NimbusNode( + beaconSyncRef: BeaconSyncRef.init rightConf.beaconSyncConfig) + +# Run execution client +leftConf.main(nodeConf) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/syncer_test_client_trace.nim.cfg b/tools/syncer/syncer_test_client_trace.nim.cfg new file mode 100644 index 0000000000..cb44702917 --- /dev/null +++ b/tools/syncer/syncer_test_client_trace.nim.cfg @@ -0,0 +1,5 @@ +-d:"chronicles_sinks=textlines[stderr]" +-d:"chronicles_runtime_filtering=on" +-d:"chronicles_line_numbers=0" +-d:"chronicles_thread_ids=no" +-d:"chronicles_log_level=TRACE" diff --git a/tools/syncer/trace/README.md b/tools/syncer/trace/README.md new file mode 100644 index 0000000000..4a4b7e2e65 --- /dev/null +++ b/tools/syncer/trace/README.md @@ -0,0 +1,34 @@ +Beacon sync tracer +================== + +For the execution layer binary, data from a syncer sessions can be captured +into a file **(capture)** along with system state information via + + ./build/syncer_test_client_trace ... -- --capture-file=(capture) + +where **...** stands for all other options that might be useful for running +an execution layer session. + +The capture file **(capture)** will hold enough data for replaying the +execution layer session(s). + +With the command line option *\-\-capture-file-file=***(capture)** +for the *syncer_test_client_trace* binary, data from the syncer sessions +will be written to the argument file named **(capture)** along with system +state information. The file **(capture)** will hold enough data for +replaying the session(s) with the *syncer_test_client_replay* binary. + +Both binary *syncer_test_client_trace* and *syncer_test_client_replay* are +extensions of the standard *nimbus_execution_client* binary. + +By default, the captured syncer session starts with the first syncer activation +(when *"Activating syncer"* is logged) and ends when the syncer is suspended +(when *"Suspending syncer"* is logged.) + +The trace file **(capture)** is organised as an ASCII text file, each line +consists of a *JSON* encoded data capture record. + +By nature of the *JSON* representation, the size of any capture data file +will be huge. Compressing with *gzip* when finished, the capture file size +can be reduced to less than 20%. The *gzipped* format will also be accepted +by the replay tools. diff --git a/tools/syncer/trace/trace_desc.nim b/tools/syncer/trace/trace_desc.nim new file mode 100644 index 0000000000..9d3cd0b4d2 --- /dev/null +++ b/tools/syncer/trace/trace_desc.nim @@ -0,0 +1,266 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Trace environment descriptor and helpers +## +## TODO: +## * n/a +## + +{.push raises:[].} + +import + std/[net, streams], + pkg/[chronos, eth/common], + ../../../execution_chain/sync/wire_protocol, + ../../../execution_chain/sync/beacon/beacon_desc, + ../../../execution_chain/sync/beacon/worker/worker_desc + +export + beacon_desc, + worker_desc + +const + TraceVersionID* = 20250917 + + TraceSetupID* = 1 ## Phase 1 layout ID, prepare + TraceRunnerID* = 10 ## Phase 2 layout ID, full execution + +type + StopIfEosHdl* = proc(trc: TraceRef) {.gcsafe, raises: [].} + ## Terminate trace if the number of sessions is exhausted + + TraceRef* = ref object of BeaconHandlersSyncRef + ## Overlay handlers extended by descriptor data + ctx*: BeaconCtxRef ## Parent context + outStream*: Stream ## Output file with capture records + backup*: BeaconHandlersRef ## Can restore previous handlers + started*: Moment ## Start time + sessions*: int ## Initial number of sessions + remaining*: int ## Number of sessions left to run + stopIfEos*: StopIfEosHdl ## Auto-disable trace when needed + serial: uint ## Unique record ID + + # ------------- + + TraceRecType* = enum + RecBase = 0 + VersionInfo = 1 + + SyncActvFailed + SyncActivated + SyncHibernated + + SchedDaemonBegin + SchedDaemonEnd + SchedStart + SchedStop + SchedPool + SchedPeerBegin + SchedPeerEnd + + FetchHeaders + SyncHeaders + + FetchBodies + SyncBodies + + ImportBlock + SyncBlock + + TraceHdrUnproc* = object + ## Optional sub-object for `TraceRecBase` + hLen*: uint64 ## # unprocessed header entries + hChunks*: uint ## # unprocessed header iv segments + hLast*: BlockNumber ## last avail block number + hLastLen*: uint64 ## size of last block number interval + + TraceBlkUnproc* = object + ## Optional sub-object for `TraceRecBase` + bLen*: uint64 ## # unprocessed block entries + bChunks*: uint ## # unprocessed block iv segments + bLeast*: BlockNumber ## least avail block number + bLeastLen*: uint64 ## size of first interval + + TracePeerCtx* = object + ## Optional sub-object for `TraceRecBase` + peerCtrl*: BuddyRunState ## Sync peer run state + peerID*: Hash ## Sync peer ID (if any) + nHdrErrors*: uint8 ## Header tranfer errors + nBlkErrors*: uint8 ## Body tranfer errors + + TraceRecBase* = object of RootObj + ## Trace context applicable with and without known peer + time*: Duration ## Relative to `TraceRef.started` + serial*: uint ## Capture record ID + frameID*: Opt[uint] ## Begin/end frame for scheduler tasks + + nPeers*: uint ## Number of sync peers (buddies) + syncState*: SyncState ## Headers/bodies preocessing state + chainMode*: HeaderChainMode ## Headers cache/DB state + poolMode*: bool ## Mostly implied by `syncState` + baseNum*: BlockNumber ## Max finalised number from `FC` module + latestNum*: BlockNumber ## Number of latest branch head + antecedent*: BlockNumber ## Lower end of header chain cache + + hdrUnpr*: Opt[TraceHdrUnproc] ## Optional unprocessed headers state + blkUnpr*: Opt[TraceBlkUnproc] ## Optional unprocessed blocks state + peerCtx*: Opt[TracePeerCtx] ## Sync peer specific ctx + slowPeer*: Opt[Hash] ## Registered slow peer + + TraceVersionInfo* = object of TraceRecBase + version*: uint + networkId*: NetworkId + + # ------------- + + TraceSyncActvFailed* = object of TraceRecBase + + TraceSyncActivated* = object of TraceRecBase + head*: Header ## Part of environment + finHash*: Hash32 ## Part of environment + + TraceSyncHibernated* = object of TraceRecBase + + # ------------- + + TraceSchedDaemonBegin* = object of TraceRecBase + ## Environment is captured before the daemon handler body is executed. + + TraceSchedDaemonEnd* = object of TraceRecBase + ## Environment is captured when leaving the daemon handler. + idleTime*: Duration ## Suggested idle time + + TraceSchedStart* = object of TraceRecBase + ## Environment is captured when leaving sched the start handler. + peerIP*: IpAddress ## Descriptor argument + peerPort*: Port ## Descriptor argument + accept*: bool ## Result/return code + + TraceSchedStop* = object of TraceRecBase + ## Environment is captured when leaving the sched stop handler. + peerIP*: IpAddress ## Descriptor argument + peerPort*: Port ## Descriptor argument + + TraceSchedPool* = object of TraceRecBase + ## Environment is captured leaving the pool handler. + peerIP*: IpAddress ## Descriptor argument + peerPort*: Port ## Descriptor argument + last*: bool ## Request argument + laps*: uint ## Request argument + stop*: bool ## Result/return code + + TraceSchedPeerBegin* = object of TraceRecBase + ## Environment is captured before the peer handler body is executed. + peerIP*: IpAddress ## Descriptor argument + peerPort*: Port ## Descriptor argument + + TraceSchedPeerEnd* = object of TraceRecBase + ## Environment is captured when leaving peer handler. + idleTime*: Duration ## Suggested idle time + + # ------------- + + TraceFetchHeaders* = object of TraceRecBase + ## Environment is captured after the `getBlockHeaders()` handler is run. + req*: BlockHeadersRequest ## Fetch request + ivReq*: BnRange ## Request as interval of block numbers + fetched*: Opt[FetchHeadersData] ## If dowloaded successfully + error*: Opt[BeaconError] + + TraceSyncHeaders* = object of TraceRecBase + ## Environment is captured when the `syncBlockHeaders()` handler is run. + + + TraceFetchBodies* = object of TraceRecBase + ## Environment is captured after the `getBlockBodies()` handler is run. + req*: BlockBodiesRequest ## Fetch request + ivReq*: BnRange ## Request as interval of block numbers + fetched*: Opt[FetchBodiesData] ## If dowloaded successfully + error*: Opt[BeaconError] + + TraceSyncBodies* = object of TraceRecBase + ## Environment is captured when the `syncBlockBodies()` handler is run. + + + TraceImportBlock* = object of TraceRecBase + ## Environment is captured after the `importBlock()` handler is run. + ethBlock*: EthBlock ## Request argument + effPeerID*: Hash ## Request argument + elapsed*: Opt[Duration] ## Processing time on success + error*: Opt[BeaconError] + + TraceSyncBlock* = object of TraceRecBase + ## Environment is captured after the `syncImportBlock()` handler is run. + + # ------------- + + JTraceRecord*[T] = object + ## Json writer record format + kind*: TraceRecType + bag*: T + +# ------------------------------------------------------------------------------ +# Public helpers +# ------------------------------------------------------------------------------ + +func trace*(ctx: BeaconCtxRef): TraceRef = + ## Getter, get trace descriptor (if any) + if ctx.handler.version == TraceRunnerID: + return ctx.handler.TraceRef + +func newSerial*(trc: TraceRef): uint64 = + trc.serial.inc + if trc.serial == 0: + trc.serial.inc + trc.serial + +func toTraceRecType*(T: type): TraceRecType = + ## Derive capture type from record layout + when T is TraceVersionInfo: + VersionInfo + elif T is TraceSyncActvFailed: + SyncActvFailed + elif T is TraceSyncActivated: + SyncActivated + elif T is TraceSyncHibernated: + SyncHibernated + elif T is TraceSchedDaemonBegin: + SchedDaemonBegin + elif T is TraceSchedDaemonEnd: + SchedDaemonEnd + elif T is TraceSchedStart: + SchedStart + elif T is TraceSchedStop: + SchedStop + elif T is TraceSchedPool: + SchedPool + elif T is TraceSchedPeerBegin: + SchedPeerBegin + elif T is TraceSchedPeerEnd: + SchedPeerEnd + elif T is TraceFetchHeaders: + FetchHeaders + elif T is TraceSyncHeaders: + SyncHeaders + elif T is TraceFetchBodies: + FetchBodies + elif T is TraceSyncBodies: + SyncBodies + elif T is TraceImportBlock: + ImportBlock + elif T is TraceSyncBlock: + SyncBlock + else: + {.error: "Unsupported trace capture record type".} + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/trace/trace_setup.nim b/tools/syncer/trace/trace_setup.nim new file mode 100644 index 0000000000..817bfcecde --- /dev/null +++ b/tools/syncer/trace/trace_setup.nim @@ -0,0 +1,179 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Trace environment setup & destroy + +{.push raises:[].} + +import + std/[os, streams, syncio], + pkg/[chronicles, chronos], + ./trace_setup/[ + setup_blocks, setup_headers, setup_helpers, setup_sched, setup_sync, + setup_write], + ./trace_desc + +logScope: + topics = "beacon trace" + +const + DontQuit = low(int) + ## To be used with `onCloseException()` + + stopInfo = "traceStop(): " + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +template onException( + info: static[string]; + quitCode: static[int]; + code: untyped) = + try: + code + except CatchableError as e: + const blurb = info & "Trace stream exception" + when quitCode == DontQuit: + error blurb, error=($e.name), msg=e.msg + else: + fatal blurb & " -- STOP", error=($e.name), msg=e.msg + quit(quitCode) + +# ----------- + +proc stopIfEos(trc: TraceRef) = + trc.remaining.dec + if trc.remaining <= 0: + info stopInfo & "Number of sessions exhausted", nSessions=trc.sessions + trc.stopSync(trc) + +proc writeVersion(ctx: BeaconCtxRef) = + var tRec: TraceVersionInfo + tRec.init ctx + tRec.version = TraceVersionID + tRec.networkId = ctx.chain.com.networkId + ctx.traceWrite tRec + trace "=Version", TraceVersionID, serial=tRec.serial + +# ----------- + +proc traceStartCB(trc: TraceRef) = + ## Start trace session handler + ## + trc.started = Moment.now() + trc.stopIfEos = stopIfEos + + # Set up redirect handlers for trace/capture + trc.version = TraceRunnerID + trc.activate = activateTrace + trc.suspend = suspendTrace + trc.schedDaemon = schedDaemonTrace + trc.schedStart = schedStartTrace + trc.schedStop = schedStopTrace + trc.schedPool = schedPoolTrace + trc.schedPeer = schedPeerTrace + trc.getBlockHeaders = fetchHeadersTrace + trc.syncBlockHeaders = syncHeadersTrace + trc.getBlockBodies = fetchBodiesTrace + trc.syncBlockBodies = syncBodiesTrace + trc.importBlock = importBlockTrace + trc.syncImportBlock = syncBlockTrace + + trc.startSync = proc(self: BeaconHandlersSyncRef) = + discard + + trc.stopSync = proc(self: BeaconHandlersSyncRef) = + stopInfo.onException(DontQuit): + TraceRef(self).outStream.flush() + TraceRef(self).outStream.close() + TraceRef(self).ctx.pool.handlers = TraceRef(self).backup + + # Write version as first record + trc.ctx.writeVersion() + +# ------------------------------------------------------------------------------ +# Public constructor/destructor +# ------------------------------------------------------------------------------ + +proc traceSetup*( + ctx: BeaconCtxRef; + fileName: string; + nSessions: int; + ): Result[void,string] = + ## Install trace handlers + const info = "traceSetup(): " + + if ctx.handler.version != 0: + return err("Overlay session handlers activated already" & + ", ID=" & $ctx.handler.version) + + if fileName.fileExists: # File must not exist yet + return err("Unsafe, please delete file first" & + ", fileName=\"" & fileName & "\"") + + var strm = Stream(nil) + info.onException(DontQuit): + # Note that there is a race condition. The proper open mode shoud be + # `fmReadWriteExisting` (sort of resembling `O_CREATE|O_EXCL`) but it + # does not work with the current nim version `2.2.4`. + var fd: File + if fd.open(fileName, fmWrite): + strm = fd.newFileStream() + + if strm.isNil: + return err("Cannot open trace file for writing" & + ", fileName=\"" & fileName & "\"") + + let trc = TraceRef( + # Install new extended handler descriptor + ctx: ctx, + outStream: strm, + backup: ctx.pool.handlers, + sessions: nSessions, + remaining: nSessions, + + # This is still the old descriptor which will be updated when + # `startSync()` is run. + version: TraceSetupID, + activate: ctx.handler.activate, + suspend: ctx.handler.suspend, + schedDaemon: ctx.handler.schedDaemon, + schedStart: ctx.handler.schedStart, + schedStop: ctx.handler.schedStop, + schedPool: ctx.handler.schedPool, + schedPeer: ctx.handler.schedPeer, + getBlockHeaders: ctx.handler.getBlockHeaders, + syncBlockHeaders: ctx.handler.syncBlockHeaders, + getBlockBodies: ctx.handler.getBlockBodies, + syncBlockBodies: ctx.handler.syncBlockBodies, + importBlock: ctx.handler.importBlock, + syncImportBlock: ctx.handler.syncImportBlock) + + trc.startSync = proc(self: BeaconHandlersSyncRef) = + TraceRef(self).traceStartCB() + + trc.stopSync = proc(self: BeaconHandlersSyncRef) = + info.onException(DontQuit): + TraceRef(self).outStream.close() + TraceRef(self).ctx.pool.handlers = TraceRef(self).backup + + ctx.pool.handlers = trc + ok() + + +proc traceRelease*(ctx: BeaconCtxRef) = + ## Stop tracer and restore descriptors + if ctx.pool.handlers.version in {TraceSetupID, TraceRunnerID}: + TraceRef(ctx.pool.handlers).stopSync(nil) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/trace/trace_setup/setup_blocks.nim b/tools/syncer/trace/trace_setup/setup_blocks.nim new file mode 100644 index 0000000000..50a21afc62 --- /dev/null +++ b/tools/syncer/trace/trace_setup/setup_blocks.nim @@ -0,0 +1,126 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Overlay handler for trace environment + +{.push raises:[].} + +import + pkg/[chronicles, chronos, stew/interval_set], + ../../../../execution_chain/networking/p2p, + ../../../../execution_chain/sync/wire_protocol/types, + ../trace_desc, + ./[setup_helpers, setup_write] + +logScope: + topics = "beacon trace" + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +proc toBnRange( + ctx: BeaconCtxRef; + lst: openArray[Hash32]; + info: static[string]; + ): BnRange = + ## Resolve block hashes as interval of block numbers + let rs = BnRangeSet.init() + for w in lst: + let h = ctx.hdrCache.get(w).valueOr: + raiseAssert info & ": Cannot resolve" & + ", hash=" & w.short + if rs.merge(h.number,h.number) != 1: + raiseAssert info & ": dulplicate hash" & + ", hash=" & w.short & ", number=" & h.bnStr + rs.ge().expect "valid BnRange" + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc fetchBodiesTrace*( + buddy: BeaconBuddyRef; + req: BlockBodiesRequest; + ): Future[Result[FetchBodiesData,BeaconError]] + {.async: (raises: []).} = + ## Replacement for `getBlockBodies()` handler which in addition writes data + ## to the output stream for tracing. + ## + let + ivReq = buddy.ctx.toBnRange(req.blockHashes, "fetchBodiesTrace") + data = await buddy.ctx.trace.backup.getBlockBodies(buddy, req) + + var tRec: TraceFetchBodies + tRec.init buddy + tRec.req = req + tRec.ivReq = ivReq + if data.isOk: + tRec.fetched = Opt.some(data.value) + else: + tRec.error = Opt.some(data.error) + buddy.traceWrite tRec + + trace "=BodiesFetch", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial, ivReq=ivReq.bnStr + return data + +proc syncBodiesTrace*( + buddy: BeaconBuddyRef; + ) = + ## Replacement for `syncBlockBodies()` handler. + var tRec: TraceSyncBodies + tRec.init buddy + buddy.traceWrite tRec + + trace "=BodiesSync", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial + + +proc importBlockTrace*( + buddy: BeaconBuddyRef; + ethBlock: EthBlock; + effPeerID: Hash; + ): Future[Result[Duration,BeaconError]] + {.async: (raises: []).} = + ## Replacement for `importBlock()` handler which in addition writes data to + ## the output stream for tracing. + ## + let data = await buddy.ctx.trace.backup.importBlock( + buddy, ethBlock, effPeerID) + + var tRec: TraceImportBlock + tRec.init buddy + tRec.ethBlock = ethBlock + tRec.effPeerID = effPeerID + if data.isOk: + tRec.elapsed = Opt.some(data.value) + else: + tRec.error = Opt.some(data.error) + buddy.traceWrite tRec + + trace "=BlockImport", peer=($buddy.peer), peerID=buddy.peerID.short, + effPeerID=effPeerID.short, serial=tRec.serial + return data + +proc syncBlockTrace*( + buddy: BeaconBuddyRef; + ) = + ## Replacement for `syncImportBlock()` handler. + var tRec: TraceSyncBlock + tRec.init buddy + buddy.traceWrite tRec + + trace "=BlockSync", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/trace/trace_setup/setup_headers.nim b/tools/syncer/trace/trace_setup/setup_headers.nim new file mode 100644 index 0000000000..2ce41a9ba1 --- /dev/null +++ b/tools/syncer/trace/trace_setup/setup_headers.nim @@ -0,0 +1,69 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Overlay handler for trace environment + +{.push raises:[].} + +import + pkg/[chronicles, chronos, stew/interval_set], + ../../../../execution_chain/networking/p2p, + ../../../../execution_chain/sync/wire_protocol/types, + ../trace_desc, + ./[setup_helpers, setup_write] + +logScope: + topics = "beacon trace" + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc fetchHeadersTrace*( + buddy: BeaconBuddyRef; + req: BlockHeadersRequest; + ): Future[Result[FetchHeadersData,BeaconError]] + {.async: (raises: []).} = + ## Replacement for `getBlockHeaders()` handler which in addition writes data + ## to the output stream for tracing. + ## + let data = await buddy.ctx.trace.backup.getBlockHeaders(buddy, req) + + if not buddy.ctx.hibernate: + var tRec: TraceFetchHeaders + tRec.init buddy + tRec.req = req + if data.isOk: + tRec.fetched = Opt.some(data.value) + else: + tRec.error = Opt.some(data.error) + buddy.traceWrite tRec + + trace "=HeadersFetch", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial + + return data + +proc syncHeadersTrace*( + buddy: BeaconBuddyRef; + ) = + ## Replacement for `syncBlockHeaders()` handler, + ## + if not buddy.ctx.hibernate: + var tRec: TraceSyncHeaders + tRec.init buddy + buddy.traceWrite tRec + + trace "=HeadersSync", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/trace/trace_setup/setup_helpers.nim b/tools/syncer/trace/trace_setup/setup_helpers.nim new file mode 100644 index 0000000000..75795784e6 --- /dev/null +++ b/tools/syncer/trace/trace_setup/setup_helpers.nim @@ -0,0 +1,87 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises:[].} + +import + std/[strformat, strutils], + pkg/[chronos, results, stew/interval_set], + ../../../../execution_chain/sync/beacon/worker/helpers as worker_helpers, + ../trace_desc + +export + worker_helpers + +# ------------------------------------------------------------------------------ +# Public initialisers +# ------------------------------------------------------------------------------ + +proc init*(tb: var TraceRecBase; ctx: BeaconCtxRef) = + ## Initialise new trace descriptor. This fuction does nothing if + ## there is no active trace. + let trc = ctx.trace + if not trc.isNil: + tb.serial = trc.newSerial + tb.time = Moment.now() - trc.started + tb.syncState = ctx.pool.lastState + tb.nPeers = ctx.pool.nBuddies.uint + tb.chainMode = ctx.hdrCache.state + tb.poolMode = ctx.poolMode + tb.baseNum = ctx.chain.baseNumber + tb.latestNum = ctx.chain.latestNumber + tb.antecedent = ctx.hdrCache.antecedent.number + + let hChunks = ctx.hdr.unprocessed.chunks().uint + if 0 < hChunks: + let iv = ctx.hdr.unprocessed.le().expect "valid iv" + tb.hdrUnpr = Opt.some(TraceHdrUnproc( + hChunks: hChunks, + hLen: ctx.hdr.unprocessed.total(), + hLast: iv.maxPt, + hLastLen: iv.len)) + + let bChunks = ctx.blk.unprocessed.chunks().uint + if 0 < bChunks: + let iv = ctx.blk.unprocessed.ge().expect "valid iv" + tb.blkUnpr = Opt.some(TraceBlkUnproc( + bChunks: bChunks, + bLen: ctx.blk.unprocessed.total(), + bLeast: iv.minPt, + bLeastLen: iv.len)) + + tb.slowPeer = ctx.pool.lastSlowPeer + + +proc init*(tb: var TraceRecBase; buddy: BeaconBuddyRef) = + ## Variant of `init()` for `buddy` rather than `ctx` + let + ctx = buddy.ctx + trc = ctx.trace + if not trc.isNil: + tb.init ctx + tb.peerCtx = Opt.some(TracePeerCtx( + peerCtrl: buddy.ctrl.state, + peerID: buddy.peerID, + nHdrErrors: buddy.only.nRespErrors.hdr, + nBlkErrors: buddy.only.nRespErrors.blk)) + +# ------------------------------------------------------------------------------ +# Public helpers +# ------------------------------------------------------------------------------ + +func short*(w: Hash): string = + w.toHex(8).toLowerAscii # strips leading 8 bytes + +func idStr*(w: uint64): string = + &"{w:x}" + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/trace/trace_setup/setup_sched.nim b/tools/syncer/trace/trace_setup/setup_sched.nim new file mode 100644 index 0000000000..d2559c0539 --- /dev/null +++ b/tools/syncer/trace/trace_setup/setup_sched.nim @@ -0,0 +1,178 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Overlay handler for trace environment + +{.push raises:[].} + +import + pkg/[chronicles, chronos], + ../../../../execution_chain/networking/p2p, + ../trace_desc, + ./[setup_helpers, setup_write] + +logScope: + topics = "beacon trace" + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +proc getIP(buddy: BeaconBuddyRef): IpAddress = + buddy.peer.remote.node.address.ip + +proc getPort(buddy: BeaconBuddyRef): Port = + let peer = buddy.peer + if peer.remote.node.address.tcpPort != Port(0): + peer.remote.node.address.tcpPort + else: + peer.remote.node.address.udpPort + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc schedDaemonTrace*( + ctx: BeaconCtxRef; + ): Future[Duration] + {.async: (raises: []).} = + ## Replacement for `schedDaemon()` handler which in addition + ## write data to the output stream for tracing. + ## + var tBeg: TraceSchedDaemonBegin + tBeg.init ctx + tBeg.frameID = Opt.some(tBeg.serial) + ctx.traceWrite tBeg + + trace "+Daemon", serial=tBeg.serial, frameID=tBeg.frameID.value.idStr, + syncState=tBeg.syncState + + let idleTime = await ctx.trace.backup.schedDaemon ctx + + var tEnd: TraceSchedDaemonEnd + tEnd.init ctx + tEnd.frameID = Opt.some(tBeg.serial) # refers back to `tBeg` capture + tEnd.idleTime = idleTime + ctx.traceWrite tEnd + + if 0 < tEnd.serial: + trace "-Daemon", serial=tEnd.serial, frameID=tEnd.frameID.value.idStr, + syncState=tBeg.syncState, idleTime=idleTime.toStr + else: + trace "-Daemon (blind)", serial="n/a", frameID=tEnd.frameID.value.idStr, + syncState=tBeg.syncState, idleTime=idleTime.toStr + + return idleTime + + +proc schedStartTrace*(buddy: BeaconBuddyRef): bool = + ## Similar to `schedDaemonTrace()` + ## + let + ctx = buddy.ctx + acceptOk = ctx.trace.backup.schedStart(buddy) + + if not ctx.hibernate: + var tRec: TraceSchedStart + tRec.init buddy + tRec.frameID = Opt.some(tRec.serial) + tRec.peerIP = buddy.getIP() + tRec.peerPort = buddy.getPort() + tRec.accept = acceptOk + buddy.traceWrite tRec + + trace "=StartPeer", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial, frameID=tRec.frameID.value.idStr, + syncState=tRec.syncState + + acceptOk + + +proc schedStopTrace*(buddy: BeaconBuddyRef) = + ## Similar to `schedDaemonTrace()` + ## + let ctx = buddy.ctx + + ctx.trace.backup.schedStop(buddy) + + if not ctx.hibernate: + var tRec: TraceSchedStop + tRec.init buddy + tRec.frameID = Opt.some(tRec.serial) + tRec.peerIP = buddy.getIP() + tRec.peerPort = buddy.getPort() + buddy.traceWrite tRec + + trace "=StopPeer", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial, frameID=tRec.frameID.value.idStr, + syncState=tRec.syncState + + +proc schedPoolTrace*(buddy: BeaconBuddyRef; last: bool; laps: int): bool = + ## Similar to `schedDaemonTrace()` + ## + let stopOk = buddy.ctx.trace.backup.schedPool(buddy, last, laps) + + var tRec: TraceSchedPool + tRec.init buddy + tRec.frameID = Opt.some(tRec.serial) + tRec.peerIP = buddy.getIP() + tRec.peerPort = buddy.getPort() + tRec.last = last + tRec.laps = laps.uint + tRec.stop = stopOk + buddy.traceWrite tRec + + trace "=Pool", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial + + stopOk + + +proc schedPeerTrace*( + buddy: BeaconBuddyRef; + ): Future[Duration] + {.async: (raises: []).} = + ## Similar to `schedDaemonTrace()` + ## + let + ctx = buddy.ctx + noisy = not ctx.hibernate + + var tBeg: TraceSchedPeerBegin + if noisy: + tBeg.init buddy + tBeg.frameID = Opt.some(tBeg.serial) + tBeg.peerIP = buddy.getIP() + tBeg.peerPort = buddy.getPort() + buddy.traceWrite tBeg + + trace "+Peer", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tBeg.serial, frameID=tBeg.frameID.value.idStr, + syncState=tBeg.syncState + + let idleTime = await ctx.trace.backup.schedPeer(buddy) + + if noisy: + var tEnd: TraceSchedPeerEnd + tEnd.init buddy + tEnd.frameID = Opt.some(tBeg.serial) # refers back to `tBeg` capture + tEnd.idleTime = idleTime + buddy.traceWrite tEnd + + trace "-Peer", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tEnd.serial, frameID=tEnd.frameID.value.idStr, + syncState=tBeg.syncState, idleTime=idleTime.toStr + + return idleTime + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/trace/trace_setup/setup_sync.nim b/tools/syncer/trace/trace_setup/setup_sync.nim new file mode 100644 index 0000000000..7617d4d778 --- /dev/null +++ b/tools/syncer/trace/trace_setup/setup_sync.nim @@ -0,0 +1,71 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Overlay handler for trace environment + +{.push raises:[].} + +import + pkg/chronicles, + ../trace_desc, + ./[setup_helpers, setup_write] + +logScope: + topics = "beacon trace" + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc activateTrace*(ctx: BeaconCtxRef) = + ## Replacement for `activate()` handler which in addition + ## write data to the output stream for tracing. + ## + let hdl = ctx.trace.backup + hdl.activate ctx + + if ctx.hibernate: + var tRec: TraceSyncActvFailed + tRec.init ctx + ctx.traceWrite tRec + + trace "=ActvFailed", serial=tRec.serial + + else: + let chn = ctx.chain + var tRec: TraceSyncActivated + tRec.init ctx + tRec.head = ctx.hdrCache.head + tRec.finHash = chn.finHash + ctx.traceWrite tRec + + trace "=Activated", serial=tRec.serial + + +proc suspendTrace*(ctx: BeaconCtxRef) = + ## Replacement for `suspend()` handler which in addition writes + ## data to the output stream for tracing. + ## + let hdl = ctx.trace.backup + hdl.suspend ctx + + var tRec: TraceSyncHibernated + tRec.init ctx + ctx.traceWrite tRec + + trace "=Suspended", serial=tRec.serial + + let trc = ctx.trace + if not trc.isNil: + trc.stopIfEos(trc) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tools/syncer/trace/trace_setup/setup_write.nim b/tools/syncer/trace/trace_setup/setup_write.nim new file mode 100644 index 0000000000..8577656310 --- /dev/null +++ b/tools/syncer/trace/trace_setup/setup_write.nim @@ -0,0 +1,66 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises:[].} + +import + std/streams, + pkg/[chronicles, chronos, json_serialization], + pkg/json_serialization/pkg/results as json_results, + pkg/eth/common/eth_types_json_serialization as json_eth_types, + ../trace_desc + +export + json_eth_types, + json_results, + json_serialization + +logScope: + topics = "beacon trace" + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +proc toStream(ctx: BeaconCtxRef; trp: TraceRecType; data: string) = + ## Write tracer data to output stream + let trc = ctx.trace + if trc.isNil: + debug "Trace output stopped while collecting", recType=trp + else: + try: + trc.outStream.writeLine data + trc.outStream.flush() + except CatchableError as e: + warn "Error writing trace data", recType=trp, + recSize=data.len, error=($e.name), msg=e.msg + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc writeValue*( + w: var JsonWriter; + v: chronos.Duration; + ) {.raises: [IOError].} = + ## Json writer mixin avoiding `{"value": NNN}` encapsulation + w.writeValue(cast[uint64](v.nanoseconds)) + +template traceWrite*(dsc: BeaconCtxRef|BeaconBuddyRef; capt: untyped) = + type T = typeof capt + const trp = T.toTraceRecType + when dsc is BeaconCtxRef: + dsc.toStream(trp, Json.encode(JTraceRecord[T](kind: trp, bag: capt))) + else: + dsc.ctx.toStream(trp, Json.encode(JTraceRecord[T](kind: trp, bag: capt))) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------