Skip to content
13 changes: 10 additions & 3 deletions crates/ruff_db/src/system/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -723,10 +723,11 @@ impl ruff_cache::CacheKey for SystemPathBuf {

/// A slice of a virtual path on [`System`](super::System) (akin to [`str`]).
#[repr(transparent)]
#[derive(Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct SystemVirtualPath(str);

impl SystemVirtualPath {
pub fn new(path: &str) -> &SystemVirtualPath {
pub const fn new(path: &str) -> &SystemVirtualPath {
// SAFETY: SystemVirtualPath is marked as #[repr(transparent)] so the conversion from a
// *const str to a *const SystemVirtualPath is valid.
unsafe { &*(path as *const str as *const SystemVirtualPath) }
Expand Down Expand Up @@ -767,8 +768,8 @@ pub struct SystemVirtualPathBuf(String);

impl SystemVirtualPathBuf {
#[inline]
pub fn as_path(&self) -> &SystemVirtualPath {
SystemVirtualPath::new(&self.0)
pub const fn as_path(&self) -> &SystemVirtualPath {
SystemVirtualPath::new(self.0.as_str())
}
}

Expand Down Expand Up @@ -852,6 +853,12 @@ impl ruff_cache::CacheKey for SystemVirtualPathBuf {
}
}

impl Borrow<SystemVirtualPath> for SystemVirtualPathBuf {
fn borrow(&self) -> &SystemVirtualPath {
self.as_path()
}
}
Comment on lines +856 to +860
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is so that we can use SystemVirtualPath as key when calling dict.get


/// Deduplicates identical paths and removes nested paths.
///
/// # Examples
Expand Down
90 changes: 62 additions & 28 deletions crates/ty_server/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use lsp_types::{PositionEncodingKind, Url};
use crate::system::AnySystemPath;
pub use notebook::NotebookDocument;
pub(crate) use range::{FileRangeExt, PositionExt, RangeExt, TextSizeExt, ToRangeExt};
use ruff_db::system::{SystemPathBuf, SystemVirtualPath};
pub(crate) use text_document::DocumentVersion;
pub use text_document::TextDocument;

Expand Down Expand Up @@ -41,51 +42,84 @@ impl From<PositionEncoding> for ruff_source_file::PositionEncoding {

/// A unique document ID, derived from a URL passed as part of an LSP request.
/// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook.
#[derive(Clone, Debug)]
pub(crate) enum DocumentKey {
Notebook(AnySystemPath),
NotebookCell {
cell_url: Url,
notebook_path: AnySystemPath,
},
Text(AnySystemPath),
///
/// The `DocumentKey` is very similar to `AnySystemPath`. The important distinction is that
/// ty doesn't know about individual notebook cells, instead, ty operates on full notebook documents.
/// ty also doesn't support resolving settings per cell, instead, settings are resolved per file or notebook.
///
/// Thus, the motivation of `DocumentKey` is to prevent accidental use of Cell keys for operations
/// that expect to work on a file path level. That's what [`DocumentHandle::to_file_path`]
/// is for, it returns a file path for any document, taking into account that these methods should
/// return the notebook for cell documents and notebooks.
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub(super) enum DocumentKey {
/// A URI using the `file` schema and maps to a valid path.
File(SystemPathBuf),

/// Any other URI.
///
/// Used for Notebook-cells, URI's with non-`file` schemes, or invalid `file` URI's.
Opaque(String),
}

impl DocumentKey {
/// Returns the file path associated with the key.
pub(crate) fn path(&self) -> &AnySystemPath {
match self {
DocumentKey::Notebook(path) | DocumentKey::Text(path) => path,
DocumentKey::NotebookCell { notebook_path, .. } => notebook_path,
/// Converts the given [`Url`] to an [`DocumentKey`].
///
/// If the URL scheme is `file`, then the path is converted to a [`SystemPathBuf`] unless
/// the url isn't a valid file path.
///
/// In all other cases, the URL is kept as an opaque identifier ([`Self::Opaque`]).
pub(crate) fn from_url(url: &Url) -> Self {
if url.scheme() == "file" {
if let Ok(path) = url.to_file_path() {
Self::File(SystemPathBuf::from_path_buf(path).expect("URL to be valid UTF-8"))
} else {
tracing::warn!(
"Treating `file:` url `{url}` as opaque URL as it isn't a valid file path"
);
Self::Opaque(url.to_string())
}
} else {
Self::Opaque(url.to_string())
}
}

pub(crate) fn from_path(path: AnySystemPath) -> Self {
// For text documents, we assume it's a text document unless it's a notebook file.
match path.extension() {
Some("ipynb") => Self::Notebook(path),
_ => Self::Text(path),
pub(crate) fn as_opaque(&self) -> Option<&str> {
match self {
Self::Opaque(uri) => Some(uri),
Self::File(_) => None,
}
}

/// Returns the URL for this document key. For notebook cells, returns the cell URL.
/// For other document types, converts the path to a URL.
pub(crate) fn to_url(&self) -> Option<Url> {
/// Returns the corresponding [`AnySystemPath`] for this document key.
///
/// Note, calling this method on a `DocumentKey::Opaque` representing a cell document
/// will return a `SystemVirtualPath` corresponding to the cell URI but not the notebook file path.
/// That's most likely not what you want.
pub(super) fn to_file_path(&self) -> AnySystemPath {
match self {
DocumentKey::NotebookCell { cell_url, .. } => Some(cell_url.clone()),
DocumentKey::Notebook(path) | DocumentKey::Text(path) => path.to_url(),
Self::File(path) => AnySystemPath::System(path.clone()),
Self::Opaque(uri) => {
AnySystemPath::SystemVirtual(SystemVirtualPath::new(uri).to_path_buf())
}
}
}
}

impl From<AnySystemPath> for DocumentKey {
fn from(value: AnySystemPath) -> Self {
match value {
AnySystemPath::System(system_path) => Self::File(system_path),
AnySystemPath::SystemVirtual(virtual_path) => Self::Opaque(virtual_path.to_string()),
}
}
}

impl std::fmt::Display for DocumentKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::NotebookCell { cell_url, .. } => cell_url.fmt(f),
Self::Notebook(path) | Self::Text(path) => match path {
AnySystemPath::System(system_path) => system_path.fmt(f),
AnySystemPath::SystemVirtual(virtual_path) => virtual_path.fmt(f),
},
Self::File(path) => path.fmt(f),
Self::Opaque(uri) => uri.fmt(f),
}
}
}
Expand Down
102 changes: 72 additions & 30 deletions crates/ty_server/src/document/notebook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,59 +3,80 @@ use lsp_types::NotebookCellKind;
use ruff_notebook::CellMetadata;
use rustc_hash::{FxBuildHasher, FxHashMap};

use crate::{PositionEncoding, TextDocument};

use super::DocumentVersion;
use crate::{PositionEncoding, TextDocument};

pub(super) type CellId = usize;

/// The state of a notebook document in the server. Contains an array of cells whose
/// contents are internally represented by [`TextDocument`]s.
#[derive(Clone, Debug)]
pub struct NotebookDocument {
url: lsp_types::Url,
cells: Vec<NotebookCell>,
metadata: ruff_notebook::RawNotebookMetadata,
version: DocumentVersion,
// Used to quickly find the index of a cell for a given URL.
cell_index: FxHashMap<lsp_types::Url, CellId>,
cell_index: FxHashMap<String, CellId>,
}

/// A single cell within a notebook, which has text contents represented as a `TextDocument`.
#[derive(Clone, Debug)]
struct NotebookCell {
/// The URL uniquely identifying the cell.
///
/// > Cell text documents have a URI, but servers should not rely on any
/// > format for this URI, since it is up to the client on how it will
/// > create these URIs. The URIs must be unique across ALL notebook
/// > cells and can therefore be used to uniquely identify a notebook cell
/// > or the cell’s text document.
/// > <https://microsoft.github.io/language-server-protocol/specifications/lsp/3.18/specification/#notebookDocument_synchronization>
url: lsp_types::Url,
kind: NotebookCellKind,
document: TextDocument,
}

impl NotebookDocument {
pub fn new(
version: DocumentVersion,
url: lsp_types::Url,
notebook_version: DocumentVersion,
cells: Vec<lsp_types::NotebookCell>,
metadata: serde_json::Map<String, serde_json::Value>,
cell_documents: Vec<lsp_types::TextDocumentItem>,
) -> crate::Result<Self> {
let mut cell_contents: FxHashMap<_, _> = cell_documents
.into_iter()
.map(|document| (document.uri, document.text))
.collect();

let cells: Vec<_> = cells
.into_iter()
.map(|cell| {
let contents = cell_contents.remove(&cell.document).unwrap_or_default();
NotebookCell::new(cell, contents, version)
})
.collect();
let mut cells: Vec<_> = cells.into_iter().map(NotebookCell::empty).collect();

let cell_index = Self::make_cell_index(&cells);

for cell_document in cell_documents {
let index = cell_index
.get(cell_document.uri.as_str())
.copied()
.ok_or_else(|| {
anyhow::anyhow!(
"Received content for cell `{}` that isn't present in the metadata",
cell_document.uri
)
})?;

cells[index].document =
TextDocument::new(cell_document.uri, cell_document.text, cell_document.version)
.with_language_id(&cell_document.language_id);
}

Ok(Self {
version,
cell_index: Self::make_cell_index(cells.as_slice()),
metadata: serde_json::from_value(serde_json::Value::Object(metadata))?,
url,
version: notebook_version,
cell_index,
cells,
metadata: serde_json::from_value(serde_json::Value::Object(metadata))?,
})
}

pub(crate) fn url(&self) -> &lsp_types::Url {
&self.url
}

/// Generates a pseudo-representation of a notebook that lacks per-cell metadata and contextual information
/// but should still work with Ruff's linter.
pub fn make_ruff_notebook(&self) -> ruff_notebook::Notebook {
Expand Down Expand Up @@ -127,7 +148,7 @@ impl NotebookDocument {
// First, delete the cells and remove them from the index.
if delete > 0 {
for cell in self.cells.drain(start..start + delete) {
self.cell_index.remove(&cell.url);
self.cell_index.remove(cell.url.as_str());
deleted_cells.insert(cell.url, cell.document);
}
}
Expand All @@ -150,16 +171,17 @@ impl NotebookDocument {
// Third, register the new cells in the index and update existing ones that came
// after the insertion.
for (index, cell) in self.cells.iter().enumerate().skip(start) {
self.cell_index.insert(cell.url.clone(), index);
self.cell_index.insert(cell.url.to_string(), index);
}

// Finally, update the text document that represents the cell with the actual
// contents. This should be done at the end so that both the `cells` and
// `cell_index` are updated before we start applying the changes to the cells.
if let Some(did_open) = structure.did_open {
for cell_text_document in did_open {
if let Some(cell) = self.cell_by_uri_mut(&cell_text_document.uri) {
if let Some(cell) = self.cell_by_uri_mut(cell_text_document.uri.as_str()) {
cell.document = TextDocument::new(
cell_text_document.uri,
cell_text_document.text,
cell_text_document.version,
);
Expand All @@ -170,15 +192,15 @@ impl NotebookDocument {

if let Some(cell_data) = data {
for cell in cell_data {
if let Some(existing_cell) = self.cell_by_uri_mut(&cell.document) {
if let Some(existing_cell) = self.cell_by_uri_mut(cell.document.as_str()) {
existing_cell.kind = cell.kind;
}
}
}

if let Some(content_changes) = text_content {
for content_change in content_changes {
if let Some(cell) = self.cell_by_uri_mut(&content_change.document.uri) {
if let Some(cell) = self.cell_by_uri_mut(content_change.document.uri.as_str()) {
cell.document
.apply_changes(content_change.changes, version, encoding);
}
Expand All @@ -204,7 +226,8 @@ impl NotebookDocument {
}

/// Get the text document representing the contents of a cell by the cell URI.
pub(crate) fn cell_document_by_uri(&self, uri: &lsp_types::Url) -> Option<&TextDocument> {
#[expect(unused)]
pub(crate) fn cell_document_by_uri(&self, uri: &str) -> Option<&TextDocument> {
self.cells
.get(*self.cell_index.get(uri)?)
.map(|cell| &cell.document)
Expand All @@ -215,29 +238,41 @@ impl NotebookDocument {
self.cells.iter().map(|cell| &cell.url)
}

fn cell_by_uri_mut(&mut self, uri: &lsp_types::Url) -> Option<&mut NotebookCell> {
fn cell_by_uri_mut(&mut self, uri: &str) -> Option<&mut NotebookCell> {
self.cells.get_mut(*self.cell_index.get(uri)?)
}

fn make_cell_index(cells: &[NotebookCell]) -> FxHashMap<lsp_types::Url, CellId> {
fn make_cell_index(cells: &[NotebookCell]) -> FxHashMap<String, CellId> {
let mut index = FxHashMap::with_capacity_and_hasher(cells.len(), FxBuildHasher);
for (i, cell) in cells.iter().enumerate() {
index.insert(cell.url.clone(), i);
index.insert(cell.url.to_string(), i);
}
index
}
}

impl NotebookCell {
pub(crate) fn empty(cell: lsp_types::NotebookCell) -> Self {
Self {
kind: cell.kind,
document: TextDocument::new(
cell.document.clone(),
String::new(),
DocumentVersion::default(),
),
url: cell.document,
}
}

pub(crate) fn new(
cell: lsp_types::NotebookCell,
contents: String,
version: DocumentVersion,
) -> Self {
Self {
document: TextDocument::new(cell.document.clone(), contents, version),
url: cell.document,
kind: cell.kind,
document: TextDocument::new(contents, version),
}
}
}
Expand Down Expand Up @@ -294,7 +329,14 @@ mod tests {
}
}

NotebookDocument::new(0, cells, serde_json::Map::default(), cell_documents).unwrap()
NotebookDocument::new(
lsp_types::Url::parse("file://test.ipynb").unwrap(),
0,
cells,
serde_json::Map::default(),
cell_documents,
)
.unwrap()
}

/// This test case checks that for a notebook with three code cells, when the client sends a
Expand Down
Loading