Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions rust/rubydex/src/indexing/local_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::collections::hash_map::Entry;
use crate::diagnostic::{Diagnostic, Rule};
use crate::model::definitions::Definition;
use crate::model::document::Document;
use crate::model::graph::NameDependent;
use crate::model::identity_maps::IdentityHashMap;
use crate::model::ids::{DefinitionId, NameId, ReferenceId, StringId, UriId};
use crate::model::name::{Name, NameRef};
Expand All @@ -18,6 +19,7 @@ type LocalGraphParts = (
IdentityHashMap<NameId, NameRef>,
IdentityHashMap<ReferenceId, ConstantReference>,
IdentityHashMap<ReferenceId, MethodRef>,
IdentityHashMap<NameId, Vec<NameDependent>>,
);

#[derive(Debug)]
Expand All @@ -29,6 +31,7 @@ pub struct LocalGraph {
names: IdentityHashMap<NameId, NameRef>,
constant_references: IdentityHashMap<ReferenceId, ConstantReference>,
method_references: IdentityHashMap<ReferenceId, MethodRef>,
name_dependents: IdentityHashMap<NameId, Vec<NameDependent>>,
}

impl LocalGraph {
Expand All @@ -42,6 +45,7 @@ impl LocalGraph {
names: IdentityHashMap::default(),
constant_references: IdentityHashMap::default(),
method_references: IdentityHashMap::default(),
name_dependents: IdentityHashMap::default(),
}
}

Expand Down Expand Up @@ -70,6 +74,13 @@ impl LocalGraph {
pub fn add_definition(&mut self, definition: Definition) -> DefinitionId {
let definition_id = definition.id();

if let Some(name_id) = definition.name_id() {
self.name_dependents
.entry(*name_id)
.or_default()
.push(NameDependent::Definition(definition_id));
}

if self.definitions.insert(definition_id, definition).is_some() {
debug_assert!(false, "DefinitionId collision in local graph");
}
Expand Down Expand Up @@ -117,6 +128,18 @@ impl LocalGraph {
entry.get_mut().increment_ref_count(1);
}
Entry::Vacant(entry) => {
if let Some(&parent_scope) = name.parent_scope().as_ref() {
self.name_dependents
.entry(parent_scope)
.or_default()
.push(NameDependent::ChildName(name_id));
}
if let Some(&nesting_id) = name.nesting().as_ref() {
self.name_dependents
.entry(nesting_id)
.or_default()
.push(NameDependent::NestedName(name_id));
}
entry.insert(NameRef::Unresolved(Box::new(name)));
}
}
Expand All @@ -133,6 +156,10 @@ impl LocalGraph {

pub fn add_constant_reference(&mut self, reference: ConstantReference) -> ReferenceId {
let reference_id = reference.id();
self.name_dependents
.entry(*reference.name_id())
.or_default()
.push(NameDependent::Reference(reference_id));

if self.constant_references.insert(reference_id, reference).is_some() {
debug_assert!(false, "ReferenceId collision in local graph");
Expand Down Expand Up @@ -172,6 +199,13 @@ impl LocalGraph {
self.document.add_diagnostic(diagnostic);
}

// Name dependents

#[must_use]
pub fn name_dependents(&self) -> &IdentityHashMap<NameId, Vec<NameDependent>> {
&self.name_dependents
}

// Into parts

#[must_use]
Expand All @@ -184,6 +218,7 @@ impl LocalGraph {
self.names,
self.constant_references,
self.method_references,
self.name_dependents,
)
}
}
79 changes: 79 additions & 0 deletions rust/rubydex/src/indexing/ruby_indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5917,3 +5917,82 @@ mod tests {
});
}
}

#[cfg(test)]
mod name_dependent_tests {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not opposed to it, but is it common in Rust to split tests groups in different modules?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. ZJIT does this as well. IMO it's a nice way to scope test helpers.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's too bad we didn't start out like that. We should've created modules for indexing each individual type of thing. Same for resolution, all ancestors tests could be separate.

Anyway, not worth the investment to refactor immediately, but something we may want later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I opened #649 in case anyway wants to give it a try.

use crate::assert_dependents;
use crate::test_utils::LocalGraphTest;

fn index_source(source: &str) -> LocalGraphTest {
LocalGraphTest::new("file:///foo.rb", source)
}

#[test]
fn track_dependency_chain() {
let context = index_source(
"
module Bar; end
CONST = 1
CONST2 = CONST

module Foo
class Bar::Baz
CONST
end

CONST2
end
",
);

assert_dependents!(&context, "Bar", [ChildName("Baz")]);
assert_dependents!(&context, "Foo", [NestedName("Baz"), NestedName("CONST2")]);
assert_dependents!(&context, "Bar::Baz", [Definition("Baz"), NestedName("CONST")]);
}

#[test]
fn multi_level_chain() {
let context = index_source(
"
module Foo
module Bar
module Baz
end
end
end
",
);

assert_dependents!(&context, "Foo", [NestedName("Bar")]);
assert_dependents!(&context, "Bar", [NestedName("Baz")]);
}

#[test]
fn singleton_class() {
let context = index_source(
"
class Foo
class << self
def bar; end
end
end
",
);

assert_dependents!(&context, "Foo", [ChildName("<Foo>")]);
}

#[test]
fn nested_vs_compact() {
let context = index_source(
"
module Foo
class Bar; end
class Foo::Baz; end
end
",
);

assert_dependents!(&context, "Foo", [NestedName("Bar"), ChildName("Baz")]);
}
}
115 changes: 110 additions & 5 deletions rust/rubydex/src/model/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@ use crate::model::references::{ConstantReference, MethodRef};
use crate::model::string_ref::StringRef;
use crate::stats;

/// An entity whose validity depends on a particular `NameId`.
/// Used as the value type in the `name_dependents` reverse index.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NameDependent {
Definition(DefinitionId),
Reference(ReferenceId),
/// This name's `parent_scope` is the key name — structural dependency.
ChildName(NameId),
/// This name's `nesting` is the key name — reference-only dependency.
NestedName(NameId),
Comment on lines +23 to +26
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a logic need for this distinction? Or could we just merge them into Name for dependent names?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the next PR (#641), we will have something like:

NameDependent::ChildName(id) => queue.push(InvalidationItem::UnresolveName(*id)),
NameDependent::NestedName(id) => queue.push(InvalidationItem::UnresolveReferences(*id)),

The main difference is that UnresolveReferences will only unresolve constant references:

class Foo; end

class Bar
  Foo
  class Baz; end
  # Bar has [NestedName(Foo), ChildName(Baz)]
end

When Bar's ancestors changes:

  • ChildName(Baz) will trigger a total invalidation on Baz
  • NestedName(Foo) will only invalide the Foo reference

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the ancestors of Bar change, all constant references inside of the namespace have to be invalidated. I suspect that we can merge these two because the information of what needs to be invalidated is already encoded in the hashmap.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's ok, I'd like to merge this as it is, and see if the invalidation algo looks better/worse without the 2nd enum after some rounds of reviews.

}

pub static BASIC_OBJECT_ID: LazyLock<DeclarationId> = LazyLock::new(|| DeclarationId::from("BasicObject"));
pub static OBJECT_ID: LazyLock<DeclarationId> = LazyLock::new(|| DeclarationId::from("Object"));
pub static MODULE_ID: LazyLock<DeclarationId> = LazyLock::new(|| DeclarationId::from("Module"));
Expand Down Expand Up @@ -41,6 +53,10 @@ pub struct Graph {

/// The position encoding used for LSP line/column locations. Not related to the actual encoding of the file
position_encoding: Encoding,

/// Reverse index: for each `NameId`, which definitions, references, and child/nested names depend on it.
/// Used during invalidation to efficiently find affected entities without scanning the full graph.
name_dependents: IdentityHashMap<NameId, Vec<NameDependent>>,
}

impl Graph {
Expand All @@ -55,6 +71,7 @@ impl Graph {
constant_references: IdentityHashMap::default(),
method_references: IdentityHashMap::default(),
position_encoding: Encoding::default(),
name_dependents: IdentityHashMap::default(),
}
}

Expand Down Expand Up @@ -501,6 +518,11 @@ impl Graph {
&self.names
}

#[must_use]
pub fn name_dependents(&self) -> &IdentityHashMap<NameId, Vec<NameDependent>> {
&self.name_dependents
}

/// Converts a `Resolved` `NameRef` back to `Unresolved`, preserving the original `Name` data.
/// Returns the `DeclarationId` it was previously resolved to, if any.
fn unresolve_name(&mut self, name_id: NameId) -> Option<DeclarationId> {
Expand Down Expand Up @@ -533,11 +555,34 @@ impl Graph {
}
}

/// Removes a name from the graph entirely.
/// Removes a name from the graph and cleans up its name-to-name edges from parent names.
fn remove_name(&mut self, name_id: NameId) {
if let Some(name_ref) = self.names.get(&name_id) {
let parent_scope = name_ref.parent_scope().as_ref().copied();
let nesting = name_ref.nesting().as_ref().copied();

if let Some(ps_id) = parent_scope {
self.remove_name_dependent(ps_id, NameDependent::ChildName(name_id));
}
if let Some(nesting_id) = nesting {
self.remove_name_dependent(nesting_id, NameDependent::NestedName(name_id));
}
}
self.name_dependents.remove(&name_id);
self.names.remove(&name_id);
}

/// Removes a specific dependent from the `name_dependents` entry for `name_id`,
/// cleaning up the entry if no dependents remain.
fn remove_name_dependent(&mut self, name_id: NameId, dependent: NameDependent) {
if let Some(deps) = self.name_dependents.get_mut(&name_id) {
deps.retain(|d| *d != dependent);
if deps.is_empty() {
self.name_dependents.remove(&name_id);
}
}
}

/// Decrements the ref count for a name and removes it if the count reaches zero.
///
/// This does not recursively untrack `parent_scope` or `nesting` names.
Expand Down Expand Up @@ -687,7 +732,7 @@ impl Graph {
/// Merges everything in `other` into this Graph. This method is meant to merge all graph representations from
/// different threads, but not meant to handle updates to the existing global representation
pub fn extend(&mut self, local_graph: LocalGraph) {
let (uri_id, document, definitions, strings, names, constant_references, method_references) =
let (uri_id, document, definitions, strings, names, constant_references, method_references, name_dependents) =
local_graph.into_parts();

if self.documents.insert(uri_id, document).is_some() {
Expand Down Expand Up @@ -735,6 +780,15 @@ impl Graph {
debug_assert!(false, "Method ReferenceId collision in global graph");
}
}

for (name_id, deps) in name_dependents {
let global_deps = self.name_dependents.entry(name_id).or_default();
for dep in deps {
if !global_deps.contains(&dep) {
global_deps.push(dep);
}
}
}
}

/// Updates the global representation with the information contained in `other`, handling deletions, insertions and
Expand Down Expand Up @@ -765,6 +819,7 @@ impl Graph {
self.unresolve_reference(*ref_id);

if let Some(constant_ref) = self.constant_references.remove(ref_id) {
self.remove_name_dependent(*constant_ref.name_id(), NameDependent::Reference(*ref_id));
self.untrack_name(*constant_ref.name_id());
}
}
Expand Down Expand Up @@ -800,8 +855,9 @@ impl Graph {
}
}

if let Some(name_id) = self.definitions.get(def_id).unwrap().name_id() {
self.untrack_name(*name_id);
if let Some(name_id) = self.definitions.get(def_id).unwrap().name_id().copied() {
self.remove_name_dependent(name_id, NameDependent::Definition(*def_id));
self.untrack_name(name_id);
}
}

Expand Down Expand Up @@ -999,7 +1055,7 @@ mod tests {
use crate::model::comment::Comment;
use crate::model::declaration::Ancestors;
use crate::test_utils::GraphTest;
use crate::{assert_descendants, assert_members_eq, assert_no_diagnostics, assert_no_members};
use crate::{assert_dependents, assert_descendants, assert_members_eq, assert_no_diagnostics, assert_no_members};

#[test]
fn deleting_a_uri() {
Expand All @@ -1021,6 +1077,55 @@ mod tests {
);
}

#[test]
fn deleting_file_triggers_name_dependent_cleanup() {
let mut context = GraphTest::new();

context.index_uri(
"file:///foo.rb",
"
module Foo
CONST
end
",
);
context.index_uri(
"file:///bar.rb",
"
module Foo
class Bar; end
end
",
);
context.resolve();

assert_dependents!(
&context,
"Foo",
[
Definition("Foo"),
Definition("Foo"),
NestedName("Bar"),
NestedName("CONST"),
]
);

// Deleting bar.rb removes Bar's name (and its NestedName edge from Foo)
// and one Definition dependent (bar.rb's `module Foo` definition).
context.delete_uri("file:///bar.rb");
assert_dependents!(&context, "Foo", [Definition("Foo"), NestedName("CONST")]);

// Deleting foo.rb cleans up everything
context.delete_uri("file:///foo.rb");
let foo_ids = context
.graph()
.names()
.iter()
.filter(|(_, n)| *n.str() == StringId::from("Foo"))
.count();
assert_eq!(foo_ids, 0, "Foo name should be removed after deleting both files");
}

#[test]
fn updating_index_with_deleted_definitions() {
let mut context = GraphTest::new();
Expand Down
Loading
Loading