diff --git a/src/packages/override.gleam b/src/packages/override.gleam new file mode 100644 index 0000000..469bd79 --- /dev/null +++ b/src/packages/override.gleam @@ -0,0 +1,59 @@ +pub fn is_ignored_package(name: String) -> Bool { + case name { + "bare_package1" + | "bare_package_one" + | "bare_package_two" + | "first_gleam_publish_package" + | "gleam_module_javascript_test" + | // Reserved official sounding names. + "gleam" + | "gleam_deno" + | "gleam_email" + | "gleam_html" + | "gleam_nodejs" + | "gleam_tcp" + | "gleam_test" + | "gleam_toml" + | "gleam_xml" + | "gleam_mongo" + | "gleam_bson" + | "gleam_file" + | "gleam_yaml" + | // Unofficial packages impersonating the core team + "gleam_dotenv" + | "gleam_roman" + | "gleam_sendgrid" + | "gleam_bbmustache" + | // Reserved unreleased project names. + "glitter" + | "sequin" -> True + + _ -> False + } +} + +/// Some words have common misspellings or associated words so we add those to +/// the search to get all appropriate results. +pub fn expand_search_term(term: String) -> List(String) { + case term { + "postgres" | "postgresql" -> ["postgres", "postgresql"] + "mysql" | "mariadb" -> ["mysql", "mariadb"] + "redis" | "valkey" -> ["redis", "valkey"] + "regex" | "regexp" -> ["regex", "regexp"] + "luster" -> ["luster", "lustre"] + "mail" -> ["mail", "email"] + term -> [term] + } +} + +pub fn is_core_package(name: String) -> Bool { + case name { + "gleam_stdlib" + | "gleam_javascript" + | "gleam_erlang" + | "gleam_otp" + | "gleam_json" + | "gleam_time" -> True + _ -> False + } +} diff --git a/src/packages/router.gleam b/src/packages/router.gleam index ea10e42..d429cc9 100644 --- a/src/packages/router.gleam +++ b/src/packages/router.gleam @@ -10,7 +10,6 @@ import gleam/time/calendar import gleam/time/timestamp import gleam/uri import packages/storage -import packages/text_search import packages/web.{type Context} import packages/web/page import wisp.{type Request, type Response} @@ -142,22 +141,11 @@ fn internet_points(ctx: Context) -> Response { fn search(request: Request, context: Context) -> Response { let search_term = get_search_parameter(request) let assert Ok(packages) = case search_term { - "" -> storage.list_packages(context.db) - _ -> text_search.lookup(context.search_index, search_term) + "" -> storage.packages_most_recent_first(context.db) + _ -> storage.search_packages(context.db, context.search_index, search_term) } - let assert Ok(packages) = - storage.ranked_package_summaries(context.db, packages, search_term) - let packages = case search_term { - "" -> - list.sort(packages, fn(a, b) { - timestamp.compare(b.updated_in_hex_at, a.updated_in_hex_at) - }) - _ -> packages - } - let assert Ok(total_package_count) = - storage.get_total_package_count(context.db) - page.packages_list(packages, total_package_count, search_term) + page.packages_list(packages, search_term) |> wisp.html_response(200) } diff --git a/src/packages/storage.gleam b/src/packages/storage.gleam index cf3cf59..f0691ee 100644 --- a/src/packages/storage.gleam +++ b/src/packages/storage.gleam @@ -1,4 +1,3 @@ -import gleam/bool import gleam/dict import gleam/dynamic/decode.{type Decoder} import gleam/float @@ -13,6 +12,8 @@ import gleam/string import gleam/time/calendar import gleam/time/timestamp.{type Timestamp} import packages/error.{type Error} +import packages/override +import packages/text_search import storail.{type Collection} pub opaque type Database { @@ -53,19 +54,6 @@ pub fn initialise(storage_path: String) -> Database { Database(hex_sync_times:, packages:, releases:) } -const ignored_packages = [ - "bare_package1", "bare_package_one", "bare_package_two", - "first_gleam_publish_package", "gleam_module_javascript_test", - // Reserved official sounding names. - "gleam", "gleam_deno", "gleam_email", "gleam_html", "gleam_nodejs", - "gleam_tcp", "gleam_test", "gleam_toml", "gleam_xml", "gleam_mongo", - "gleam_bson", "gleam_file", "gleam_yaml", - // Unofficial packages impersonating the core team - "gleam_dotenv", "gleam_roman", "gleam_sendgrid", "gleam_bbmustache", - // Reserved unreleased project names. - "glitter", "sequin", -] - fn gleam_package_epoch() -> Timestamp { timestamp.from_unix_seconds(1_635_092_380) } @@ -305,7 +293,7 @@ pub fn upsert_package_from_hex( package: hexpm.Package, latest_version latest_version: String, ) -> Result(Nil, Error) { - case is_ignored_package(package.name) { + case override.is_ignored_package(package.name) { True -> Ok(Nil) False -> { database.packages @@ -316,10 +304,6 @@ pub fn upsert_package_from_hex( } } -pub fn is_ignored_package(name: String) -> Bool { - list.contains(ignored_packages, name) -} - pub fn get_package(database: Database, name: String) -> Result(Package, Error) { database.packages |> storail.key(name) @@ -337,13 +321,6 @@ pub fn get_optional_package( |> result.map_error(error.StorageError) } -pub fn get_total_package_count(database: Database) -> Result(Int, Error) { - database.packages - |> storail.list([]) - |> result.map(list.length) - |> result.map_error(error.StorageError) -} - pub fn upsert_release( database: Database, package: String, @@ -387,77 +364,69 @@ pub fn list_releases( pub fn list_packages(database: Database) -> Result(List(String), Error) { case storail.list(database.packages, []) { - Ok(packages) -> Ok(list.filter(packages, fn(p) { !is_ignored_package(p) })) + Ok(packages) -> + Ok(list.filter(packages, fn(p) { !override.is_ignored_package(p) })) Error(e) -> Error(error.StorageError(e)) } } -type Groups { - Groups( - exact: List(Package), - regular: List(Package), - v0: List(Package), - old: List(Package), - ) -} - -pub fn ranked_package_summaries( - database: Database, - packages: List(String), +pub fn search_packages( + db: Database, + search: text_search.TextSearchIndex, search_term: String, ) -> Result(List(Package), Error) { - let gleam_v1 = - timestamp.from_calendar( - calendar.Date(2024, calendar.March, 4), - calendar.TimeOfDay(0, 0, 0, 0), - calendar.utc_offset, - ) - - use packages <- result.map({ - use name <- list.try_map(packages) - get_package(database, name) - }) + let bool = fn(b) { + case b { + True -> 1 + False -> 0 + } + } + use found <- result.try(text_search.lookup(search, search_term)) + use packages <- result.map( + list.try_map(found, fn(found) { + use package <- result.map(get_package(db, found.name)) + + let exact_package_name_match = bool(search_term == package.name) + let is_not_v0 = bool(!string.starts_with(package.latest_version, "0.")) + let is_core_package = bool(override.is_core_package(package.name)) + let updated_at = + float.round(timestamp.to_unix_seconds(package.updated_in_hex_at)) + + // This is the value we use to determine what order packages should be + // shown by. Later list values only take effect if the earlier ones are + // equal. + let ordering_key = [ + exact_package_name_match, + is_not_v0, + found.match_count, + is_core_package, + package.downloads_recent, + updated_at, + ] + #(ordering_key, package) + }), + ) - let groups = Groups([], [], [], []) - - let groups = - list.fold(packages, groups, fn(groups, package) { - // The ordering of the clauses matter. Something can be both v0 and old, - // and which group it goes into impacts the final ordering. - - use <- bool.lazy_guard(package.name == search_term, fn() { - Groups(..groups, exact: [package, ..groups.exact]) - }) - - let is_old = - timestamp.compare(package.updated_in_hex_at, gleam_v1) == order.Lt - use <- bool.lazy_guard(is_old, fn() { - Groups(..groups, old: [package, ..groups.old]) - }) - - let is_zero_version = string.starts_with(package.latest_version, "0.") - use <- bool.lazy_guard(is_zero_version, fn() { - Groups(..groups, v0: [package, ..groups.v0]) - }) - - Groups(..groups, regular: [package, ..groups.regular]) - }) - - let Groups(exact:, regular:, v0:, old:) = groups - // This list is ordered backwards, so the later in the list the higher it - // will be shown in the UI. - [ - // Packages published before Gleam v1.0.0 are likely outdated. - old, - // v0 versions are discouraged, so they are shown lower. - v0, - // Regular versions are not prioritised in any particular way. - regular, - // Exact matches for the search term come first. - exact, - ] - |> list.flatten - |> list.reverse + packages + |> list.sort(fn(a, b) { list_compare(b.0, a.0, int.compare) }) + |> list.map(fn(pair) { pair.1 }) +} + +fn list_compare( + a: List(t), + b: List(t), + compare: fn(t, t) -> order.Order, +) -> order.Order { + case a, b { + [], [] -> order.Eq + [], _ -> order.Lt + _, [] -> order.Gt + [a1, ..a], [b1, ..b] -> + case compare(a1, b1) { + order.Eq -> list_compare(a, b, compare) + order.Gt as order | order.Lt as order -> order + } + } } pub fn try_fold_packages( @@ -586,3 +555,11 @@ fn date_string(timestamp: Timestamp) -> String { |> timestamp.to_rfc3339(calendar.utc_offset) |> string.slice(0, 10) } + +pub fn packages_most_recent_first(db: Database) -> Result(List(Package), Error) { + use packages <- result.try(list_packages(db)) + use packages <- result.map(list.try_map(packages, get_package(db, _))) + list.sort(packages, fn(a, b) { + timestamp.compare(b.updated_in_hex_at, a.updated_in_hex_at) + }) +} diff --git a/src/packages/text_search.gleam b/src/packages/text_search.gleam index 5323d3e..48331bb 100644 --- a/src/packages/text_search.gleam +++ b/src/packages/text_search.gleam @@ -1,13 +1,11 @@ import ethos.{type BagTable} import gleam/dict -import gleam/int import gleam/list import gleam/option -import gleam/order import gleam/result import gleam/string import packages/error.{type Error} -import packages/storage +import packages/override import porter_stemmer pub opaque type TextSearchIndex { @@ -23,7 +21,7 @@ pub fn insert( name name: String, description description: String, ) -> Result(Nil, Error) { - case storage.is_ignored_package(name) { + case override.is_ignored_package(name) { True -> Ok(Nil) False -> name @@ -46,13 +44,15 @@ pub fn update( insert(index, name, description) } +/// Find all matches for the given search term. The list is not returned in any +/// order, but each found item is returned with a match count. pub fn lookup( index: TextSearchIndex, phrase: String, -) -> Result(List(String), Error) { +) -> Result(List(Found), Error) { let phrase = string.lowercase(phrase) stem_words(phrase) - |> list.flat_map(expand_search_term) + |> list.flat_map(override.expand_search_term) |> list.try_map(ethos.get(index.table, _)) |> result.map(fn(names) { names @@ -61,44 +61,13 @@ pub fn lookup( dict.upsert(counters, name, fn(x) { option.unwrap(x, 0) + 1 }) }) |> dict.to_list - |> list.map(fn(pair) { - case pair.0 { - // Rank up proritised packages - "gleam_stdlib" - | "gleam_javascript" - | "gleam_erlang" - | "gleam_otp" - | "gleam_json" - | "gleam_time" -> #(pair.0, pair.1 + 10) - _ -> pair - } - }) - |> list.sort(fn(a, b) { - case a, b { - // Exact matches come first - #(name, _), _ if name == phrase -> order.Lt - _, #(name, _) if name == phrase -> order.Gt - // Otherwise compare the score - _, _ -> int.compare(b.1, a.1) - } - }) - |> list.map(fn(pair) { pair.0 }) + |> list.map(fn(pair) { Found(pair.0, pair.1) }) }) |> result.replace_error(error.EtsTableError) } -/// Some words have common misspellings or associated words so we add those to -/// the search to get all appropriate results. -fn expand_search_term(term: String) -> List(String) { - case term { - "postgres" | "postgresql" -> ["postgres", "postgresql"] - "mysql" | "mariadb" -> ["mysql", "mariadb"] - "redis" | "valkey" -> ["redis", "valkey"] - "regex" | "regexp" -> ["regex", "regexp"] - "luster" -> ["luster", "lustre"] - "mail" -> ["mail", "email"] - term -> [term] - } +pub type Found { + Found(name: String, match_count: Int) } fn remove(index: TextSearchIndex, name: String) -> Result(Nil, Error) { diff --git a/src/packages/web/page.gleam b/src/packages/web/page.gleam index a12e6e9..3c5ccb2 100644 --- a/src/packages/web/page.gleam +++ b/src/packages/web/page.gleam @@ -11,14 +11,10 @@ import lustre/element/html import packages/storage.{type Package} import packages/web/icons -pub fn packages_list( - packages: List(Package), - total_package_count: Int, - search_term: String, -) -> String { +pub fn packages_list(packages: List(Package), search_term: String) -> String { html.div( [attribute.class("content")], - search_aware_package_list(packages, total_package_count, search_term), + search_aware_package_list(packages, search_term), ) |> layout } @@ -105,13 +101,12 @@ fn search_form(search_term: String) -> Element(Nil) { fn search_aware_package_list( packages: List(Package), - total_package_count: Int, search_term: String, ) -> List(Element(Nil)) { let header_phrase = case search_term, packages { "", [] -> "No packages have been added yet" "", [_] -> "1 package is available!" - "", _ -> int.to_string(total_package_count) <> " packages are available!" + "", _ -> int.to_string(list.length(packages)) <> " packages are available!" _, [] -> "No packages match your query" _, [_] -> "1 package matches your query!" diff --git a/test/packages/text_search_test.gleam b/test/packages/text_search_test.gleam index cf410a8..77d301a 100644 --- a/test/packages/text_search_test.gleam +++ b/test/packages/text_search_test.gleam @@ -1,4 +1,4 @@ -import packages/text_search +import packages/text_search.{Found} pub fn lookup_empty_test() { let index = text_search.new() @@ -13,9 +13,9 @@ pub fn lookup_case_test() { let assert Ok(_) = text_search.insert(index, "squirrel", "SQL") let assert Ok(value) = text_search.lookup(index, "HTML") - assert value == ["lustre"] + assert value == [Found("lustre", 1)] let assert Ok(value) = text_search.lookup(index, "html") - assert value == ["lustre"] + assert value == [Found("lustre", 1)] } pub fn lookup_different_case_exact_match_test() { @@ -24,41 +24,41 @@ pub fn lookup_different_case_exact_match_test() { let assert Ok(_) = text_search.insert(index, "blah", "wibble wibble wibble") let assert Ok(value) = text_search.lookup(index, "wibble") - assert value == ["wibble", "blah"] + assert value == [Found("blah", 1), Found("wibble", 1)] let assert Ok(value) = text_search.lookup(index, "WIBBLE") - assert value == ["wibble", "blah"] + assert value == [Found("blah", 1), Found("wibble", 1)] let assert Ok(value) = text_search.lookup(index, "Wibble") - assert value == ["wibble", "blah"] + assert value == [Found("blah", 1), Found("wibble", 1)] } pub fn lookup_ing_test() { let index = text_search.new() let assert Ok(_) = - text_search.insert(index, "lustre", "HTML templates and stuff") + text_search.insert(index, "text", "HTML templates and stuff") let assert Ok(_) = text_search.insert(index, "squirrel", "SQL") let assert Ok(value) = text_search.lookup(index, "templating") - assert value == ["lustre"] + assert value == [Found("text", 1)] } pub fn lookup_er_test() { let index = text_search.new() let assert Ok(_) = - text_search.insert(index, "lustre", "HTML templates and stuff") + text_search.insert(index, "text", "HTML templates and stuff") let assert Ok(_) = text_search.insert(index, "squirrel", "SQL") let assert Ok(value) = text_search.lookup(index, "templater") - assert value == ["lustre"] + assert value == [Found("text", 1)] } pub fn lookup_spaces_test() { let index = text_search.new() let assert Ok(_) = - text_search.insert(index, "lustre", "HTML templates and stuff") + text_search.insert(index, "text", "HTML templates and stuff") let assert Ok(_) = text_search.insert(index, "squirrel", "SQL") let assert Ok(value) = text_search.lookup(index, " html templater ") - assert value == ["lustre"] + assert value == [Found("text", 2)] } pub fn lookup_more_matches_higher_rank_test() { @@ -67,7 +67,7 @@ pub fn lookup_more_matches_higher_rank_test() { let assert Ok(_) = text_search.insert(index, "httpc", "http client") let assert Ok(value) = text_search.lookup(index, "http client") - assert value == ["httpc", "pog"] + assert value == [Found("httpc", 2), Found("pog", 1)] } pub fn ignored_test() { @@ -77,9 +77,9 @@ pub fn ignored_test() { let assert Ok(_) = text_search.insert(index, "gleam_bson", "wibble") let assert Ok(value) = text_search.lookup(index, "gleam_bson") - assert value == ["clean_bson"] + assert value == [Found("clean_bson", 1)] let assert Ok(value) = text_search.lookup(index, "wibble") - assert value == ["clean_bson"] + assert value == [Found("clean_bson", 1)] } pub fn word_in_title_test() { @@ -87,7 +87,7 @@ pub fn word_in_title_test() { let assert Ok(_) = text_search.insert(index, "gleam_regexp", "") let assert Ok(value) = text_search.lookup(index, "regexp") - assert value == ["gleam_regexp"] + assert value == [Found("gleam_regexp", 1)] } // regex also searches for regexp @@ -97,7 +97,7 @@ pub fn extra_regex_test() { let assert Ok(_) = text_search.insert(index, "third_party_regex", "") let assert Ok(value) = text_search.lookup(index, "regex") - assert value == ["gleam_regexp", "third_party_regex"] + assert value == [Found("gleam_regexp", 1), Found("third_party_regex", 1)] } pub fn case_insensitive_test() { @@ -110,25 +110,13 @@ pub fn case_insensitive_test() { ) let assert Ok(value) = text_search.lookup(index, "S3") - assert value == ["bucket"] + assert value == [Found("bucket", 1)] let assert Ok(value) = text_search.lookup(index, "s3") - assert value == ["bucket"] + assert value == [Found("bucket", 1)] let assert Ok(value) = text_search.lookup(index, "gArAgE") - assert value == ["bucket"] -} - -pub fn exact_title_match_goes_first_test() { - let index = text_search.new() - let assert Ok(_) = text_search.insert(index, "lustre_1", "stuff for lustre") - let assert Ok(_) = text_search.insert(index, "lustre_2", "stuff for lustre") - let assert Ok(_) = text_search.insert(index, "lustre", "html stuff") - let assert Ok(_) = text_search.insert(index, "lustre_3", "stuff for lustre") - let assert Ok(_) = text_search.insert(index, "lustre_4", "stuff for lustre") - - let assert Ok(value) = text_search.lookup(index, "lustre") - assert value == ["lustre", "lustre_1", "lustre_2", "lustre_3", "lustre_4"] + assert value == [Found("bucket", 1)] } pub fn translate_from_freedom_language_test() { @@ -142,11 +130,11 @@ pub fn translate_from_freedom_language_test() { // Traditional let assert Ok(value) = text_search.lookup(index, "colour") - assert value == ["gleam_community_colour"] + assert value == [Found("gleam_community_colour", 1)] // USA let assert Ok(value) = text_search.lookup(index, "color") - assert value == ["gleam_community_colour"] + assert value == [Found("gleam_community_colour", 1)] // Irish let assert Ok(value) = text_search.lookup(index, "dath") @@ -160,5 +148,5 @@ pub fn underscores_test() { let assert Ok(_) = text_search.insert(index, "glam", "") let assert Ok(value) = text_search.lookup(index, "lustre_dev") - assert value == ["lustre_dev_tools", "lustre"] + assert value == [Found("lustre", 1), Found("lustre_dev_tools", 2)] }