Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions src/packages/override.gleam
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
pub fn is_ignored_package(name: String) -> Bool {
case name {
"bare_package1"
| "bare_package_one"
| "bare_package_two"
| "first_gleam_publish_package"
| "gleam_module_javascript_test"
| // Reserved official sounding names.
"gleam"
| "gleam_deno"
| "gleam_email"
| "gleam_html"
| "gleam_nodejs"
| "gleam_tcp"
| "gleam_test"
| "gleam_toml"
| "gleam_xml"
| "gleam_mongo"
| "gleam_bson"
| "gleam_file"
| "gleam_yaml"
| // Unofficial packages impersonating the core team
"gleam_dotenv"
| "gleam_roman"
| "gleam_sendgrid"
| "gleam_bbmustache"
| // Reserved unreleased project names.
"glitter"
| "sequin" -> True

_ -> False
}
}

/// Some words have common misspellings or associated words so we add those to
/// the search to get all appropriate results.
pub fn expand_search_term(term: String) -> List(String) {
case term {
"postgres" | "postgresql" -> ["postgres", "postgresql"]
"mysql" | "mariadb" -> ["mysql", "mariadb"]
"redis" | "valkey" -> ["redis", "valkey"]
"regex" | "regexp" -> ["regex", "regexp"]
"luster" -> ["luster", "lustre"]
"mail" -> ["mail", "email"]
term -> [term]
}
}

pub fn is_core_package(name: String) -> Bool {
case name {
"gleam_stdlib"
| "gleam_javascript"
| "gleam_erlang"
| "gleam_otp"
| "gleam_json"
| "gleam_time" -> True
_ -> False
}
}
18 changes: 3 additions & 15 deletions src/packages/router.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import gleam/time/calendar
import gleam/time/timestamp
import gleam/uri
import packages/storage
import packages/text_search
import packages/web.{type Context}
import packages/web/page
import wisp.{type Request, type Response}
Expand Down Expand Up @@ -142,22 +141,11 @@ fn internet_points(ctx: Context) -> Response {
fn search(request: Request, context: Context) -> Response {
let search_term = get_search_parameter(request)
let assert Ok(packages) = case search_term {
"" -> storage.list_packages(context.db)
_ -> text_search.lookup(context.search_index, search_term)
"" -> storage.packages_most_recent_first(context.db)
_ -> storage.search_packages(context.db, context.search_index, search_term)
}
let assert Ok(packages) =
storage.ranked_package_summaries(context.db, packages, search_term)
let packages = case search_term {
"" ->
list.sort(packages, fn(a, b) {
timestamp.compare(b.updated_in_hex_at, a.updated_in_hex_at)
})
_ -> packages
}
let assert Ok(total_package_count) =
storage.get_total_package_count(context.db)

page.packages_list(packages, total_package_count, search_term)
page.packages_list(packages, search_term)
|> wisp.html_response(200)
}

Expand Down
157 changes: 67 additions & 90 deletions src/packages/storage.gleam
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import gleam/bool
import gleam/dict
import gleam/dynamic/decode.{type Decoder}
import gleam/float
Expand All @@ -13,6 +12,8 @@ import gleam/string
import gleam/time/calendar
import gleam/time/timestamp.{type Timestamp}
import packages/error.{type Error}
import packages/override
import packages/text_search
import storail.{type Collection}

pub opaque type Database {
Expand Down Expand Up @@ -53,19 +54,6 @@ pub fn initialise(storage_path: String) -> Database {
Database(hex_sync_times:, packages:, releases:)
}

const ignored_packages = [
"bare_package1", "bare_package_one", "bare_package_two",
"first_gleam_publish_package", "gleam_module_javascript_test",
// Reserved official sounding names.
"gleam", "gleam_deno", "gleam_email", "gleam_html", "gleam_nodejs",
"gleam_tcp", "gleam_test", "gleam_toml", "gleam_xml", "gleam_mongo",
"gleam_bson", "gleam_file", "gleam_yaml",
// Unofficial packages impersonating the core team
"gleam_dotenv", "gleam_roman", "gleam_sendgrid", "gleam_bbmustache",
// Reserved unreleased project names.
"glitter", "sequin",
]

fn gleam_package_epoch() -> Timestamp {
timestamp.from_unix_seconds(1_635_092_380)
}
Expand Down Expand Up @@ -305,7 +293,7 @@ pub fn upsert_package_from_hex(
package: hexpm.Package,
latest_version latest_version: String,
) -> Result(Nil, Error) {
case is_ignored_package(package.name) {
case override.is_ignored_package(package.name) {
True -> Ok(Nil)
False -> {
database.packages
Expand All @@ -316,10 +304,6 @@ pub fn upsert_package_from_hex(
}
}

pub fn is_ignored_package(name: String) -> Bool {
list.contains(ignored_packages, name)
}

pub fn get_package(database: Database, name: String) -> Result(Package, Error) {
database.packages
|> storail.key(name)
Expand All @@ -337,13 +321,6 @@ pub fn get_optional_package(
|> result.map_error(error.StorageError)
}

pub fn get_total_package_count(database: Database) -> Result(Int, Error) {
database.packages
|> storail.list([])
|> result.map(list.length)
|> result.map_error(error.StorageError)
}

pub fn upsert_release(
database: Database,
package: String,
Expand Down Expand Up @@ -387,77 +364,69 @@ pub fn list_releases(

pub fn list_packages(database: Database) -> Result(List(String), Error) {
case storail.list(database.packages, []) {
Ok(packages) -> Ok(list.filter(packages, fn(p) { !is_ignored_package(p) }))
Ok(packages) ->
Ok(list.filter(packages, fn(p) { !override.is_ignored_package(p) }))
Error(e) -> Error(error.StorageError(e))
}
}

type Groups {
Groups(
exact: List(Package),
regular: List(Package),
v0: List(Package),
old: List(Package),
)
}

pub fn ranked_package_summaries(
database: Database,
packages: List(String),
pub fn search_packages(
db: Database,
search: text_search.TextSearchIndex,
search_term: String,
) -> Result(List(Package), Error) {
let gleam_v1 =
timestamp.from_calendar(
calendar.Date(2024, calendar.March, 4),
calendar.TimeOfDay(0, 0, 0, 0),
calendar.utc_offset,
)

use packages <- result.map({
use name <- list.try_map(packages)
get_package(database, name)
})
let bool = fn(b) {
case b {
True -> 1
False -> 0
}
}
use found <- result.try(text_search.lookup(search, search_term))
use packages <- result.map(
list.try_map(found, fn(found) {
use package <- result.map(get_package(db, found.name))

let exact_package_name_match = bool(search_term == package.name)
let is_not_v0 = bool(!string.starts_with(package.latest_version, "0."))
let is_core_package = bool(override.is_core_package(package.name))
let updated_at =
float.round(timestamp.to_unix_seconds(package.updated_in_hex_at))

// This is the value we use to determine what order packages should be
// shown by. Later list values only take effect if the earlier ones are
// equal.
let ordering_key = [
exact_package_name_match,
is_not_v0,
found.match_count,
is_core_package,
package.downloads_recent,
updated_at,
]
#(ordering_key, package)
}),
)

let groups = Groups([], [], [], [])

let groups =
list.fold(packages, groups, fn(groups, package) {
// The ordering of the clauses matter. Something can be both v0 and old,
// and which group it goes into impacts the final ordering.

use <- bool.lazy_guard(package.name == search_term, fn() {
Groups(..groups, exact: [package, ..groups.exact])
})

let is_old =
timestamp.compare(package.updated_in_hex_at, gleam_v1) == order.Lt
use <- bool.lazy_guard(is_old, fn() {
Groups(..groups, old: [package, ..groups.old])
})

let is_zero_version = string.starts_with(package.latest_version, "0.")
use <- bool.lazy_guard(is_zero_version, fn() {
Groups(..groups, v0: [package, ..groups.v0])
})

Groups(..groups, regular: [package, ..groups.regular])
})

let Groups(exact:, regular:, v0:, old:) = groups
// This list is ordered backwards, so the later in the list the higher it
// will be shown in the UI.
[
// Packages published before Gleam v1.0.0 are likely outdated.
old,
// v0 versions are discouraged, so they are shown lower.
v0,
// Regular versions are not prioritised in any particular way.
regular,
// Exact matches for the search term come first.
exact,
]
|> list.flatten
|> list.reverse
packages
|> list.sort(fn(a, b) { list_compare(b.0, a.0, int.compare) })
|> list.map(fn(pair) { pair.1 })
}

fn list_compare(
a: List(t),
b: List(t),
compare: fn(t, t) -> order.Order,
) -> order.Order {
case a, b {
[], [] -> order.Eq
[], _ -> order.Lt
_, [] -> order.Gt
[a1, ..a], [b1, ..b] ->
case compare(a1, b1) {
order.Eq -> list_compare(a, b, compare)
order.Gt as order | order.Lt as order -> order
}
}
}

pub fn try_fold_packages(
Expand Down Expand Up @@ -586,3 +555,11 @@ fn date_string(timestamp: Timestamp) -> String {
|> timestamp.to_rfc3339(calendar.utc_offset)
|> string.slice(0, 10)
}

pub fn packages_most_recent_first(db: Database) -> Result(List(Package), Error) {
use packages <- result.try(list_packages(db))
use packages <- result.map(list.try_map(packages, get_package(db, _)))
list.sort(packages, fn(a, b) {
timestamp.compare(b.updated_in_hex_at, a.updated_in_hex_at)
})
}
49 changes: 9 additions & 40 deletions src/packages/text_search.gleam
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import ethos.{type BagTable}
import gleam/dict
import gleam/int
import gleam/list
import gleam/option
import gleam/order
import gleam/result
import gleam/string
import packages/error.{type Error}
import packages/storage
import packages/override
import porter_stemmer

pub opaque type TextSearchIndex {
Expand All @@ -23,7 +21,7 @@ pub fn insert(
name name: String,
description description: String,
) -> Result(Nil, Error) {
case storage.is_ignored_package(name) {
case override.is_ignored_package(name) {
True -> Ok(Nil)
False ->
name
Expand All @@ -46,13 +44,15 @@ pub fn update(
insert(index, name, description)
}

/// Find all matches for the given search term. The list is not returned in any
/// order, but each found item is returned with a match count.
pub fn lookup(
index: TextSearchIndex,
phrase: String,
) -> Result(List(String), Error) {
) -> Result(List(Found), Error) {
let phrase = string.lowercase(phrase)
stem_words(phrase)
|> list.flat_map(expand_search_term)
|> list.flat_map(override.expand_search_term)
|> list.try_map(ethos.get(index.table, _))
|> result.map(fn(names) {
names
Expand All @@ -61,44 +61,13 @@ pub fn lookup(
dict.upsert(counters, name, fn(x) { option.unwrap(x, 0) + 1 })
})
|> dict.to_list
|> list.map(fn(pair) {
case pair.0 {
// Rank up proritised packages
"gleam_stdlib"
| "gleam_javascript"
| "gleam_erlang"
| "gleam_otp"
| "gleam_json"
| "gleam_time" -> #(pair.0, pair.1 + 10)
_ -> pair
}
})
|> list.sort(fn(a, b) {
case a, b {
// Exact matches come first
#(name, _), _ if name == phrase -> order.Lt
_, #(name, _) if name == phrase -> order.Gt
// Otherwise compare the score
_, _ -> int.compare(b.1, a.1)
}
})
|> list.map(fn(pair) { pair.0 })
|> list.map(fn(pair) { Found(pair.0, pair.1) })
})
|> result.replace_error(error.EtsTableError)
}

/// Some words have common misspellings or associated words so we add those to
/// the search to get all appropriate results.
fn expand_search_term(term: String) -> List(String) {
case term {
"postgres" | "postgresql" -> ["postgres", "postgresql"]
"mysql" | "mariadb" -> ["mysql", "mariadb"]
"redis" | "valkey" -> ["redis", "valkey"]
"regex" | "regexp" -> ["regex", "regexp"]
"luster" -> ["luster", "lustre"]
"mail" -> ["mail", "email"]
term -> [term]
}
pub type Found {
Found(name: String, match_count: Int)
}

fn remove(index: TextSearchIndex, name: String) -> Result(Nil, Error) {
Expand Down
Loading