Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
55c34fe
Add `startMarking()`, `clearMarkings()` & `sweep()` gc helpers
lejeunerenard Feb 12, 2026
c9cef3b
Add storage backing for marks (via simple boolean)
lejeunerenard Feb 12, 2026
6e2ff18
Refactor marks to be bitfield pages
lejeunerenard Feb 13, 2026
005d98a
Check cleared blocks too in large core test for mark & sweep
lejeunerenard Feb 13, 2026
71c1ee5
Refactor createMarkStream into a stream
lejeunerenard Feb 13, 2026
ad09e33
Add tests for `MarkBitfield` & fix default `reverse` in mark stream
lejeunerenard Feb 13, 2026
7f5a42f
Adjust the core length in mark n sweep basic test to hit 2 pages
lejeunerenard Feb 13, 2026
9c1b7f8
Clear pages when clearing the `MarkBitfield`
lejeunerenard Feb 13, 2026
09e83a1
Remove unused `findFirst()` & `findLast()` on `MarkPage`
lejeunerenard Feb 13, 2026
54c5f38
Lint new methods in `index.js`
lejeunerenard Feb 13, 2026
b3ed2c5
Lint `mark-n-sweep.js` test
lejeunerenard Feb 13, 2026
c53c85b
Temporarily use github branch for `hypercore-storage`
lejeunerenard Feb 13, 2026
bf35982
Add tests for mark & sweep on sessions
lejeunerenard Feb 24, 2026
f3b2b0e
Fix non-reverse mark stream & get being first op that loads from storage
lejeunerenard Feb 24, 2026
87a70bc
Rename block marking method to be non-private
lejeunerenard Feb 24, 2026
d42a6a7
Lint tests & `lib/mark-bitfield.js`
lejeunerenard Feb 24, 2026
24afcb4
Fix tests by closing session
lejeunerenard Feb 24, 2026
029a923
Reduce number of appends for mark n sweep large core test
lejeunerenard Feb 24, 2026
76aab78
Document Mark & Sweep in `README.md`
lejeunerenard Feb 24, 2026
29fbfd3
Remove extraneous `return`
lejeunerenard Feb 24, 2026
abb106b
Add minimal example
lejeunerenard Feb 24, 2026
701f233
Add micro "mark & sweep" example to README.md
lejeunerenard Feb 24, 2026
2371145
Remove `TODO`s for using the same transaction for "mark & sweep"
lejeunerenard Feb 24, 2026
15dd4eb
Refactor mark & sweep to a `gc` object API
lejeunerenard Feb 26, 2026
642b742
Lint `lib/mark-n-sweep.js`
lejeunerenard Feb 26, 2026
5bd254c
Add guard against calling `gc()` twice
lejeunerenard Feb 26, 2026
030f9eb
Lint `index.js`
lejeunerenard Feb 26, 2026
d7bc033
Lint `index.js` again
lejeunerenard Feb 26, 2026
c73604e
Revert "Refactor mark & sweep to a `gc` object API"
lejeunerenard Feb 27, 2026
173e093
Refactor `core.markBlock()` to support a range via `start` & `end` args
lejeunerenard Mar 2, 2026
2db7f30
Fix sweeping after not marking throwing
lejeunerenard Mar 5, 2026
194c881
Allow marking blocks on a snapshot, but not sweeping
lejeunerenard Mar 18, 2026
1ed2b39
Throw calling `.startMarking()` on a named session
lejeunerenard Mar 18, 2026
e18f927
Lint `index.js` & `test/mark-n-sweep.js`
lejeunerenard Mar 18, 2026
9d4beb1
Close sessions & snap before test ends
lejeunerenard Mar 19, 2026
b7617ff
Add warning about potential caching pitfall when using mark & sweep
lejeunerenard Mar 19, 2026
3c00719
Unnest warning about caching w/ mark & sweep
lejeunerenard Mar 19, 2026
a42277c
Add assertions blocking atomic sessions from marking as well
lejeunerenard Mar 19, 2026
a6d8b95
Revert temporary github dependency for `hypercore-storage`
lejeunerenard Mar 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,54 @@ Info {
}
```

#### `await core.startMarking()`

This enables marking mode for the "mark & sweep" approach to clear hypercore storage. When called the current markings are cleared.

##### Mark & Sweep

This technique allows for marking blocks that should be kept and assuming all other blocks should be cleared. It can be achieved using the following steps:

1. Enable marking mode via `await core.startMarking()`.
2. Get all blocks that should be kept.
While the marking mode is enabled, all blocks retrieved (via `.get()`, etc) will be "marked". Marked blocks will not be cleared when sweeping.
3. Sweep to clear unmarked blocks via `await core.sweep()`.
Once complete, all blocks that were not marked will be cleared.

> [!CAUTION]
> Be careful that caching does not skip a call to `.get()`.
> For example, `hyperbee` has caches for looking up the b-tree nodes that
> needs to be cleared before using mark & sweep.

Example:

```js
await core.startMarking()
await core.get(2)
await core.get(4)
await core.sweep() // All blocks but blocks 2 & 4 are cleared
```

#### `await core.markBlock(start, end = start + 1)`

Manually mark a block or range of blocks to be retained when sweeping. Useful to mark blocks without loading them into memory. `end` is non-inclusive and defaults to `start + 1` so `core.markBlock(index)` only marks the block at `index`.

#### `await core.clearMarkings()`

Manually remove all markings. Automatically called when calling `core.startMarking()`.

#### `await core.sweep(opts)`

Clear all unmarked blocks from storage.

`opts` can include:

```
{
batchSize: 1000 // How frequently to flush clears to storage.
}
```

#### `await core.close([{ error }])`

Fully close this core. Passing an error via `{ error }` is optional and all pending replicator requests will be rejected with the error.
Expand Down
23 changes: 23 additions & 0 deletions examples/mark-n-sweep.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
const Hypercore = require('../')

start()

async function start() {
const core = new Hypercore('./mark-n-sweep')

await core.append('block0')
await core.append('block1')
await core.append('block2')
await core.append('block3')
await core.append('block4')

await core.startMarking()
await core.get(2)
await core.get(4)
await core.sweep()

console.log('has(0)', await core.has(0)) // Prints "has(0) false"
console.log('has(4)', await core.has(4)) // Prints "has(4) true"

await core.close()
}
87 changes: 87 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ const id = require('hypercore-id-encoding')
const safetyCatch = require('safety-catch')
const unslab = require('unslab')
const flat = require('flat-tree')
const assert = require('nanoassert')

const { SMALL_WANTS } = require('./lib/feature-flags')
const { UPDATE_COMPAT } = require('./lib/wants')
const MarkBitfield = require('./lib/mark-bitfield')

const inspect = require('./lib/inspect')
const Core = require('./lib/core')
Expand Down Expand Up @@ -92,6 +94,10 @@ class Hypercore extends EventEmitter {

this.waits = 0

// Mark & Sweep GC
this._marking = false
this._marks = null

this._sessionIndex = -1
this._stateIndex = -1 // maintained by session state
this._monitorIndex = -1 // maintained by replication state
Expand Down Expand Up @@ -208,6 +214,8 @@ class Hypercore extends EventEmitter {
const onseq = opts.onseq === undefined ? this.onseq : opts.onseq
const timeout = opts.timeout === undefined ? this.timeout : opts.timeout
const weak = opts.weak === undefined ? this.weak : opts.weak
const marking = this._marking
const marks = this._marks
const Clz = opts.class || Hypercore
const s = new Clz(null, this.key, {
...opts,
Expand All @@ -219,6 +227,8 @@ class Hypercore extends EventEmitter {
weak,
parent: this
})
s._marking = marking
s._marks = marks

return s
}
Expand Down Expand Up @@ -805,6 +815,7 @@ class Hypercore extends EventEmitter {
(opts && opts.valueEncoding && c.from(opts.valueEncoding)) || this.valueEncoding

if (this.onseq !== null) this.onseq(index, this)
if (this._marking) await this.markBlock(index)

const req = this._get(index, opts)

Expand Down Expand Up @@ -915,6 +926,82 @@ class Hypercore extends EventEmitter {
return defaultValue
}

_setupMarks() {
if (this._marks === null) {
const storage = this.snapshotted ? this.core.state.storage : this.state.storage
this._marks = new MarkBitfield(storage)
}
}

async markBlock(start, end = start + 1) {
if (this.opened === false) await this.opening

this._setupMarks()

// TODO support as single rocks batch
const setPromises = []
for (let i = start; i < end; i++) {
setPromises.push(this._marks.set(i, true))
}

return Promise.all(setPromises)
}

async clearMarkings() {
if (this.opened === false) await this.opening

this._setupMarks()

await this._marks.clear()
this._marks = null
}

async startMarking() {
if (this._marking) {
throw ASSERTION("Hypercore cannot be gc'ed when already in gc mode", this.discoveryKey)
}
if (this.state && this.state.name) {
throw ASSERTION("Hypercore cannot be gc'ed when a named session", this.discoveryKey)
}
if (this.state && this.state.storage.atom) {
throw ASSERTION("Hypercore cannot be gc'ed when an atomic session", this.discoveryKey)
}
if (this.opened === false) await this.opening
await this.clearMarkings()

this._marking = true
}

async sweep({ batchSize = 1000 } = {}) {
if (this.opened === false) await this.opening

assert(!this.snapshotted, 'Cannot sweep a snapshot')

// No marks - load from storage
this._setupMarks()

let clearing = []
let prevIndex = this.length
for await (const index of this._marks.createMarkStream({ reverse: true })) {
if (index + 1 === prevIndex) {
prevIndex = index
continue
}
clearing.push(this.clear(index + 1, prevIndex))
if (clearing.length >= batchSize) {
await Promise.all(clearing)
clearing = []
}
prevIndex = index
}
// Clear range from the very start if not marked
if (prevIndex > 0) clearing.push(this.clear(0, prevIndex))
await Promise.all(clearing)

this._marking = false
await this.clearMarkings()
}

createReadStream(opts) {
return new ReadStream(this, opts)
}
Expand Down
108 changes: 108 additions & 0 deletions lib/mark-bitfield.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
const BigSparseArray = require('big-sparse-array')
const { Transform } = require('streamx')
const quickbit = require('./compat').quickbit
const b4a = require('b4a')

const BITS_PER_PAGE = 32768
const BYTES_PER_PAGE = BITS_PER_PAGE / 8

class MarkPage {
constructor() {
this.bitfield = null
this.loaded = new Promise((resolve) => {
this.load = resolve
})
}

setBitfield(bitfield) {
this.bitfield = bitfield
this.load()
}

get(index) {
return quickbit.get(this.bitfield, index)
}

set(index, val) {
quickbit.set(this.bitfield, index, val)
}
}

module.exports = class MarkBitfield {
static BITS_PER_PAGE = BITS_PER_PAGE
static BYTES_PER_PAGE = BYTES_PER_PAGE

constructor(storage) {
this.storage = storage
this._pages = new BigSparseArray()
}

async loadPage(pageIndex) {
const p = this._pages.set(pageIndex, new MarkPage())
const rx = this.storage.read()
const pageBuf = rx.getMark(pageIndex)
rx.tryFlush()
const bitfield = (await pageBuf) ?? b4a.alloc(BYTES_PER_PAGE)
await p.setBitfield(bitfield)
return p
}

async get(index) {
const j = index & (BITS_PER_PAGE - 1)
const i = (index - j) / BITS_PER_PAGE

let p = this._pages.get(i)
if (!p) p = await this.loadPage(i)

return p.get(j)
}

async set(index, val) {
const j = index & (BITS_PER_PAGE - 1)
const i = (index - j) / BITS_PER_PAGE

let p = this._pages.get(i)

if (!p && val) p = await this.loadPage(i)

if (p) {
await p.loaded
p.set(j, val)
const tx = this.storage.write()
tx.putMark(i, p.bitfield)
await tx.flush()
}
}

async clear() {
const tx = this.storage.write()
tx.deleteMarkRange(0, -1)
await tx.flush()
this._pages = new BigSparseArray()
}

createMarkStream({ reverse = false } = {}) {
return this.storage.createMarkStream({ reverse }).pipe(
new Transform({
transform({ index, page }, cb) {
let bitIndex = reverse
? quickbit.findLast(page, true, BITS_PER_PAGE)
: quickbit.findFirst(page, true, 0)
while (bitIndex !== -1) {
const blockIndex = index * BITS_PER_PAGE + bitIndex
this.push(blockIndex)

// Account for `bitIndex` being either causing infinite loop
if (bitIndex === 0 && reverse) break

bitIndex = reverse
? quickbit.findLast(page, true, bitIndex - 1)
: quickbit.findFirst(page, true, bitIndex + 1)
}

cb(null)
}
})
)
}
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"hypercore-crypto": "^3.2.1",
"hypercore-errors": "^1.5.0",
"hypercore-id-encoding": "^1.2.0",
"hypercore-storage": "^2.0.0",
"hypercore-storage": "^2.8.0",
"is-options": "^1.0.1",
"nanoassert": "^2.0.0",
"protomux": "^3.5.0",
Expand Down
1 change: 1 addition & 0 deletions test/all.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ async function runTests() {
await import('./extension.js')
await import('./fully-remote-proof.js')
await import('./manifest.js')
await import('./mark-n-sweep.js')
await import('./merkle-tree.js')
await import('./merkle-tree-recovery.js')
await import('./move-to.js')
Expand Down
Loading
Loading