Skip to content

Commit 398514a

Browse files
committed
Auto merge of #9125 - ehuss:index-docs, r=alexcrichton
Add some documentation for index and registry stuff. This adds some internal docs for index and registry things. Split out of #9111.
2 parents 3875bbb + 838e538 commit 398514a

File tree

5 files changed

+132
-14
lines changed

5 files changed

+132
-14
lines changed

src/cargo/sources/registry/index.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,28 @@ fn overflow_hyphen() {
164164
)
165165
}
166166

167+
/// Manager for handling the on-disk index.
168+
///
169+
/// Note that local and remote registries store the index differently. Local
170+
/// is a simple on-disk tree of files of the raw index. Remote registries are
171+
/// stored as a raw git repository. The different means of access are handled
172+
/// via the [`RegistryData`] trait abstraction.
173+
///
174+
/// This transparently handles caching of the index in a more efficient format.
167175
pub struct RegistryIndex<'cfg> {
168176
source_id: SourceId,
177+
/// Root directory of the index for the registry.
169178
path: Filesystem,
179+
/// Cache of summary data.
180+
///
181+
/// This is keyed off the package name. The [`Summaries`] value handles
182+
/// loading the summary data. It keeps an optimized on-disk representation
183+
/// of the JSON files, which is created in an as-needed fashion. If it
184+
/// hasn't been cached already, it uses [`RegistryData::load`] to access
185+
/// to JSON files from the index, and the creates the optimized on-disk
186+
/// summary cache.
170187
summaries_cache: HashMap<InternedString, Summaries>,
188+
/// [`Config`] reference for convenience.
171189
config: &'cfg Config,
172190
}
173191

src/cargo/sources/registry/local.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ use std::io::prelude::*;
99
use std::io::SeekFrom;
1010
use std::path::Path;
1111

12+
/// A local registry is a registry that lives on the filesystem as a set of
13+
/// `.crate` files with an `index` directory in the same format as a remote
14+
/// registry.
1215
pub struct LocalRegistry<'cfg> {
1316
index_path: Filesystem,
1417
root: Filesystem,

src/cargo/sources/registry/mod.rs

Lines changed: 106 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@
8585
//! ```
8686
//!
8787
//! The root of the index contains a `config.json` file with a few entries
88-
//! corresponding to the registry (see `RegistryConfig` below).
88+
//! corresponding to the registry (see [`RegistryConfig`] below).
8989
//!
9090
//! Otherwise, there are three numbered directories (1, 2, 3) for crates with
9191
//! names 1, 2, and 3 characters in length. The 1/2 directories simply have the
@@ -189,16 +189,42 @@ const VERSION_TEMPLATE: &str = "{version}";
189189
const PREFIX_TEMPLATE: &str = "{prefix}";
190190
const LOWER_PREFIX_TEMPLATE: &str = "{lowerprefix}";
191191

192+
/// A "source" for a [local](local::LocalRegistry) or
193+
/// [remote](remote::RemoteRegistry) registry.
194+
///
195+
/// This contains common functionality that is shared between the two registry
196+
/// kinds, with the registry-specific logic implemented as part of the
197+
/// [`RegistryData`] trait referenced via the `ops` field.
192198
pub struct RegistrySource<'cfg> {
193199
source_id: SourceId,
200+
/// The path where crate files are extracted (`$CARGO_HOME/registry/src/$REG-HASH`).
194201
src_path: Filesystem,
202+
/// Local reference to [`Config`] for convenience.
195203
config: &'cfg Config,
204+
/// Whether or not the index has been updated.
205+
///
206+
/// This is used as an optimization to avoid updating if not needed, such
207+
/// as `Cargo.lock` already exists and the index already contains the
208+
/// locked entries. Or, to avoid updating multiple times.
209+
///
210+
/// Only remote registries really need to update. Local registries only
211+
/// check that the index exists.
196212
updated: bool,
213+
/// Abstraction for interfacing to the different registry kinds.
197214
ops: Box<dyn RegistryData + 'cfg>,
215+
/// Interface for managing the on-disk index.
198216
index: index::RegistryIndex<'cfg>,
217+
/// A set of packages that should be allowed to be used, even if they are
218+
/// yanked.
219+
///
220+
/// This is populated from the entries in `Cargo.lock` to ensure that
221+
/// `cargo update -p somepkg` won't unlock yanked entries in `Cargo.lock`.
222+
/// Otherwise, the resolver would think that those entries no longer
223+
/// exist, and it would trigger updates to unrelated packages.
199224
yanked_whitelist: HashSet<PackageId>,
200225
}
201226

227+
/// The `config.json` file stored in the index.
202228
#[derive(Deserialize)]
203229
pub struct RegistryConfig {
204230
/// Download endpoint for all crates.
@@ -278,18 +304,7 @@ fn escaped_char_in_json() {
278304
.unwrap();
279305
}
280306

281-
#[derive(Deserialize)]
282-
#[serde(field_identifier, rename_all = "lowercase")]
283-
enum Field {
284-
Name,
285-
Vers,
286-
Deps,
287-
Features,
288-
Cksum,
289-
Yanked,
290-
Links,
291-
}
292-
307+
/// A dependency as encoded in the index JSON.
293308
#[derive(Deserialize)]
294309
struct RegistryDependency<'a> {
295310
name: InternedString,
@@ -369,30 +384,108 @@ impl<'a> RegistryDependency<'a> {
369384
}
370385
}
371386

387+
/// An abstract interface to handle both a [local](local::LocalRegistry) and
388+
/// [remote](remote::RemoteRegistry) registry.
389+
///
390+
/// This allows [`RegistrySource`] to abstractly handle both registry kinds.
372391
pub trait RegistryData {
392+
/// Performs initialization for the registry.
393+
///
394+
/// This should be safe to call multiple times, the implementation is
395+
/// expected to not do any work if it is already prepared.
373396
fn prepare(&self) -> CargoResult<()>;
397+
398+
/// Returns the path to the index.
399+
///
400+
/// Note that different registries store the index in different formats
401+
/// (remote=git, local=files).
374402
fn index_path(&self) -> &Filesystem;
403+
404+
/// Loads the JSON for a specific named package from the index.
405+
///
406+
/// * `root` is the root path to the index.
407+
/// * `path` is the relative path to the package to load (like `ca/rg/cargo`).
408+
/// * `data` is a callback that will receive the raw bytes of the index JSON file.
375409
fn load(
376410
&self,
377411
root: &Path,
378412
path: &Path,
379413
data: &mut dyn FnMut(&[u8]) -> CargoResult<()>,
380414
) -> CargoResult<()>;
415+
416+
/// Loads the `config.json` file and returns it.
417+
///
418+
/// Local registries don't have a config, and return `None`.
381419
fn config(&mut self) -> CargoResult<Option<RegistryConfig>>;
420+
421+
/// Updates the index.
422+
///
423+
/// For a remote registry, this updates the index over the network. Local
424+
/// registries only check that the index exists.
382425
fn update_index(&mut self) -> CargoResult<()>;
426+
427+
/// Prepare to start downloading a `.crate` file.
428+
///
429+
/// Despite the name, this doesn't actually download anything. If the
430+
/// `.crate` is already downloaded, then it returns [`MaybeLock::Ready`].
431+
/// If it hasn't been downloaded, then it returns [`MaybeLock::Download`]
432+
/// which contains the URL to download. The [`crate::core::package::Download`]
433+
/// system handles the actual download process. After downloading, it
434+
/// calls [`finish_download`] to save the downloaded file.
435+
///
436+
/// `checksum` is currently only used by local registries to verify the
437+
/// file contents (because local registries never actually download
438+
/// anything). Remote registries will validate the checksum in
439+
/// `finish_download`. For already downloaded `.crate` files, it does not
440+
/// validate the checksum, assuming the filesystem does not suffer from
441+
/// corruption or manipulation.
383442
fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult<MaybeLock>;
443+
444+
/// Finish a download by saving a `.crate` file to disk.
445+
///
446+
/// After [`crate::core::package::Download`] has finished a download,
447+
/// it will call this to save the `.crate` file. This is only relevant
448+
/// for remote registries. This should validate the checksum and save
449+
/// the given data to the on-disk cache.
450+
///
451+
/// Returns a [`File`] handle to the `.crate` file, positioned at the start.
384452
fn finish_download(&mut self, pkg: PackageId, checksum: &str, data: &[u8])
385453
-> CargoResult<File>;
386454

455+
/// Returns whether or not the `.crate` file is already downloaded.
387456
fn is_crate_downloaded(&self, _pkg: PackageId) -> bool {
388457
true
389458
}
459+
460+
/// Validates that the global package cache lock is held.
461+
///
462+
/// Given the [`Filesystem`], this will make sure that the package cache
463+
/// lock is held. If not, it will panic. See
464+
/// [`Config::acquire_package_cache_lock`] for acquiring the global lock.
465+
///
466+
/// Returns the [`Path`] to the [`Filesystem`].
390467
fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path;
468+
469+
/// Returns the current "version" of the index.
470+
///
471+
/// For local registries, this returns `None` because there is no
472+
/// versioning. For remote registries, this returns the SHA hash of the
473+
/// git index on disk (or None if the index hasn't been downloaded yet).
474+
///
475+
/// This is used by index caching to check if the cache is out of date.
391476
fn current_version(&self) -> Option<InternedString>;
392477
}
393478

479+
/// The status of [`RegistryData::download`] which indicates if a `.crate`
480+
/// file has already been downloaded, or if not then the URL to download.
394481
pub enum MaybeLock {
482+
/// The `.crate` file is already downloaded. [`File`] is a handle to the
483+
/// opened `.crate` file on the filesystem.
395484
Ready(File),
485+
/// The `.crate` file is not downloaded, here's the URL to download it from.
486+
///
487+
/// `descriptor` is just a text string to display to the user of what is
488+
/// being downloaded.
396489
Download { url: String, descriptor: String },
397490
}
398491

src/cargo/sources/registry/remote.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,12 @@ fn make_dep_prefix(name: &str) -> String {
2929
}
3030
}
3131

32+
/// A remote registry is a registry that lives at a remote URL (such as
33+
/// crates.io). The git index is cloned locally, and `.crate` files are
34+
/// downloaded as needed and cached locally.
3235
pub struct RemoteRegistry<'cfg> {
3336
index_path: Filesystem,
37+
/// Path to the cache of `.crate` files (`$CARGO_HOME/registry/path/$REG-HASH`).
3438
cache_path: Filesystem,
3539
source_id: SourceId,
3640
index_git_ref: GitReference,

src/cargo/util/toml/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -876,7 +876,7 @@ struct Context<'a, 'b> {
876876
}
877877

878878
impl TomlManifest {
879-
/// Prepares the manfiest for publishing.
879+
/// Prepares the manifest for publishing.
880880
// - Path and git components of dependency specifications are removed.
881881
// - License path is updated to point within the package.
882882
pub fn prepare_for_publish(

0 commit comments

Comments
 (0)