Skip to content

Commit e929d88

Browse files
committed
Resync all data periodically
1 parent 8df193a commit e929d88

File tree

4 files changed

+85
-25
lines changed

4 files changed

+85
-25
lines changed

src/packages.gleam

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import wisp/wisp_mist
2323

2424
const usage = "Usage:
2525
gleam run server
26+
gleam run sync
2627
gleam run sync --name PACKAGE_NAME
2728
"
2829

src/packages/storage.gleam

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import storail.{type Collection}
1717

1818
pub opaque type Database {
1919
Database(
20-
most_recent_hex_timestamp: Collection(Timestamp),
20+
hex_sync_times: Collection(Timestamp),
2121
packages: Collection(Package),
2222
releases: Collection(Release),
2323
)
@@ -26,9 +26,9 @@ pub opaque type Database {
2626
pub fn initialise(storage_path: String) -> Database {
2727
let config = storail.Config(storage_path:)
2828

29-
let most_recent_hex_timestamp =
29+
let hex_sync_times =
3030
storail.Collection(
31-
name: "most_recent_hex_timestamp",
31+
name: "hex_sync_times",
3232
to_json: json_timestamp,
3333
decoder: decode.int |> decode.map(timestamp.from_unix_seconds),
3434
config:,
@@ -50,7 +50,7 @@ pub fn initialise(storage_path: String) -> Database {
5050
config:,
5151
)
5252

53-
Database(most_recent_hex_timestamp:, packages:, releases:)
53+
Database(hex_sync_times:, packages:, releases:)
5454
}
5555

5656
const ignored_packages = [
@@ -210,24 +210,39 @@ fn release_decoder() -> Decoder(Release) {
210210
))
211211
}
212212

213-
/// Insert or replace the most recent Hex timestamp in the database.
214-
pub fn upsert_most_recent_hex_timestamp(
213+
pub type WhichHexSyncTime {
214+
PartialSync
215+
FullSync
216+
}
217+
218+
fn which_hex_sync_time_key(which: WhichHexSyncTime) -> String {
219+
case which {
220+
PartialSync -> "most-recent-partial-sync"
221+
FullSync -> "most-recent-full-sync"
222+
}
223+
}
224+
225+
/// Insert or replace the a Hex timestamp in the database.
226+
pub fn upsert_hex_sync_time(
215227
database: Database,
228+
which: WhichHexSyncTime,
216229
time: Timestamp,
217230
) -> Result(Nil, Error) {
218-
database.most_recent_hex_timestamp
219-
|> storail.key("latest")
231+
database.hex_sync_times
232+
|> storail.key(which_hex_sync_time_key(which))
220233
|> storail.write(time)
221234
|> result.map_error(error.StorageError)
222235
}
223236

224-
/// Get the most recent Hex timestamp from the database, returning the Unix
225-
/// epoch if there is no previous timestamp in the database.
226-
pub fn get_most_recent_hex_timestamp(
237+
/// Get a Hex timestamp from the database, returning a time
238+
/// before the first package publication if there is no previous timestamp in
239+
/// the database.
240+
pub fn get_hex_sync_time(
227241
database: Database,
242+
which: WhichHexSyncTime,
228243
) -> Result(Timestamp, Error) {
229-
database.most_recent_hex_timestamp
230-
|> storail.key("latest")
244+
database.hex_sync_times
245+
|> storail.key(which_hex_sync_time_key(which))
231246
|> storail.optional_read
232247
|> result.map(option.unwrap(_, gleam_package_epoch()))
233248
|> result.map_error(error.StorageError)

src/packages/syncing.gleam

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import gleam/time/duration
1414
import gleam/time/timestamp.{type Timestamp}
1515
import gleam/uri
1616
import packages/error.{type Error}
17-
import packages/storage.{type Database}
17+
import packages/storage.{type Database, FullSync, PartialSync}
1818
import packages/text_search
1919
import wisp
2020

@@ -42,8 +42,26 @@ pub fn sync_new_gleam_releases(
4242
db: Database,
4343
text_search: text_search.TextSearchIndex,
4444
) -> Result(Nil, Error) {
45-
wisp.log_info("Syncing new releases from Hex")
46-
use limit <- try(storage.get_most_recent_hex_timestamp(db))
45+
let now = timestamp.system_time()
46+
use latest_partial <- try(storage.get_hex_sync_time(db, PartialSync))
47+
use latest_full <- try(storage.get_hex_sync_time(db, FullSync))
48+
49+
// Periodically a full-sync is performed, to get the most up-to-date
50+
// information for all packages and releases instead of just those that have
51+
// been published since the last sync.
52+
let next_full_sync_deadline = timestamp.add(latest_full, duration.hours(12))
53+
let #(limit, mode) = case timestamp.compare(now, next_full_sync_deadline) {
54+
order.Gt | order.Eq -> {
55+
wisp.log_info("Performing full Hex sync, getting all data")
56+
#(latest_full, FullSync)
57+
}
58+
order.Lt -> {
59+
wisp.log_info("Performing partial Hex sync, getting newly published data")
60+
#(latest_partial, PartialSync)
61+
}
62+
}
63+
64+
// Perform the sync with the Hex API
4765
use latest <- try(
4866
sync_packages(State(
4967
page: 1,
@@ -55,9 +73,16 @@ pub fn sync_new_gleam_releases(
5573
text_search:,
5674
)),
5775
)
58-
let latest = storage.upsert_most_recent_hex_timestamp(db, latest)
76+
77+
// Record stats, so we know how to scan next time.
78+
use _ <- result.try(storage.upsert_hex_sync_time(db, PartialSync, latest))
79+
use _ <- result.try(case mode {
80+
FullSync -> storage.upsert_hex_sync_time(db, FullSync, latest)
81+
PartialSync -> Ok(Nil)
82+
})
83+
5984
wisp.log_info("Up to date!")
60-
latest
85+
Ok(Nil)
6186
}
6287

6388
pub fn fetch_and_sync_package(

test/packages/storage_test.gleam

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,42 @@ import gleam/option.{None, Some}
44
import gleam/time/calendar
55
import gleam/time/timestamp
66
import packages/error
7-
import packages/storage.{Package, Release}
7+
import packages/storage.{FullSync, Package, PartialSync, Release}
88
import storail
99
import tests
1010

11-
pub fn most_recent_hex_timestamp_test() {
11+
pub fn partial_sync_time_test() {
1212
use db <- tests.with_database
1313

14-
let assert Ok(Nil) =
15-
storage.upsert_most_recent_hex_timestamp(db, timestamp.from_unix_seconds(0))
16-
let assert Ok(time) = storage.get_most_recent_hex_timestamp(db)
14+
let timestamp = timestamp.from_unix_seconds(0)
15+
let assert Ok(Nil) = storage.upsert_hex_sync_time(db, PartialSync, timestamp)
16+
let assert Ok(time) = storage.get_hex_sync_time(db, PartialSync)
1717
let assert #(0, 0) = timestamp.to_unix_seconds_and_nanoseconds(time)
1818
let assert "1970-01-01T00:00:00Z" =
1919
timestamp.to_rfc3339(time, calendar.utc_offset)
2020

2121
let timestamp = timestamp.from_unix_seconds(2_284_352_323)
22-
let assert Ok(Nil) = storage.upsert_most_recent_hex_timestamp(db, timestamp)
23-
let assert Ok(time) = storage.get_most_recent_hex_timestamp(db)
22+
let assert Ok(Nil) = storage.upsert_hex_sync_time(db, PartialSync, timestamp)
23+
let assert Ok(time) = storage.get_hex_sync_time(db, PartialSync)
24+
let assert "2042-05-22T06:18:43Z" =
25+
timestamp.to_rfc3339(time, calendar.utc_offset)
26+
let assert #(2_284_352_323, 0) =
27+
timestamp.to_unix_seconds_and_nanoseconds(time)
28+
}
29+
30+
pub fn full_sync_time_test() {
31+
use db <- tests.with_database
32+
33+
let timestamp = timestamp.from_unix_seconds(0)
34+
let assert Ok(Nil) = storage.upsert_hex_sync_time(db, FullSync, timestamp)
35+
let assert Ok(time) = storage.get_hex_sync_time(db, FullSync)
36+
let assert #(0, 0) = timestamp.to_unix_seconds_and_nanoseconds(time)
37+
let assert "1970-01-01T00:00:00Z" =
38+
timestamp.to_rfc3339(time, calendar.utc_offset)
39+
40+
let timestamp = timestamp.from_unix_seconds(2_284_352_323)
41+
let assert Ok(Nil) = storage.upsert_hex_sync_time(db, FullSync, timestamp)
42+
let assert Ok(time) = storage.get_hex_sync_time(db, FullSync)
2443
let assert "2042-05-22T06:18:43Z" =
2544
timestamp.to_rfc3339(time, calendar.utc_offset)
2645
let assert #(2_284_352_323, 0) =

0 commit comments

Comments
 (0)