diff --git a/datajunction-server/datajunction_server/api/graphql/main.py b/datajunction-server/datajunction_server/api/graphql/main.py index fa5c0e73e..c66ef07e8 100644 --- a/datajunction-server/datajunction_server/api/graphql/main.py +++ b/datajunction-server/datajunction_server/api/graphql/main.py @@ -19,6 +19,7 @@ ) from datajunction_server.api.graphql.queries.catalogs import list_catalogs from datajunction_server.api.graphql.queries.collections import list_collections +from datajunction_server.api.graphql.queries.namespaces import list_namespaces from datajunction_server.api.graphql.queries.dag import ( common_dimensions, downstream_nodes, @@ -41,6 +42,7 @@ DialectInfo, ) from datajunction_server.api.graphql.scalars.collection import Collection +from datajunction_server.api.graphql.scalars.namespace import Namespace from datajunction_server.api.graphql.scalars.node import DimensionAttribute, Node from datajunction_server.api.graphql.scalars.sql import ( GeneratedSQL, @@ -194,6 +196,12 @@ class Query: description="List collections, optionally filtered by creator (for My Workspace).", ) + # Namespace queries + list_namespaces: list[Namespace] = strawberry.field( + resolver=log_resolver(list_namespaces), + description="List all active namespaces with node counts and git configuration.", + ) + schema = strawberry.Schema(query=Query) diff --git a/datajunction-server/datajunction_server/api/graphql/queries/namespaces.py b/datajunction-server/datajunction_server/api/graphql/queries/namespaces.py new file mode 100644 index 000000000..0c7839247 --- /dev/null +++ b/datajunction-server/datajunction_server/api/graphql/queries/namespaces.py @@ -0,0 +1,72 @@ +""" +Namespace GraphQL queries. +""" + +from typing import Optional, Union + +from sqlalchemy import func, select +from strawberry.types import Info + +from datajunction_server.api.graphql.scalars.namespace import ( + GitBranchConfig, + GitRootConfig, + Namespace, +) +from datajunction_server.database.namespace import NodeNamespace +from datajunction_server.database.node import Node + + +async def list_namespaces( + *, + info: Info = None, +) -> list[Namespace]: + """ + List all active namespaces with node counts and git configuration. + + For git root namespaces, git is a GitRootConfig. + For branch namespaces, git is a GitBranchConfig with the root config embedded. + For non-git namespaces, git is null. + """ + session = info.context["session"] # type: ignore + statement = ( + select(NodeNamespace, func.count(Node.id).label("num_nodes")) + .join(Node, onclause=NodeNamespace.namespace == Node.namespace, isouter=True) + .where(NodeNamespace.deactivated_at.is_(None)) + .group_by(NodeNamespace.namespace) + ) + result = await session.execute(statement) + rows = result.all() + + # Build a map so branch namespaces can resolve their root config inline + ns_map = {ns.namespace: ns for ns, _ in rows} + + namespaces = [] + for ns, num_nodes in rows: + git: Optional[Union[GitRootConfig, GitBranchConfig]] = None + if ns.github_repo_path: + git = GitRootConfig( # type: ignore + repo=ns.github_repo_path, + path=ns.git_path, + default_branch=ns.default_branch, + ) + elif ns.git_branch and ns.parent_namespace: + parent = ns_map.get(ns.parent_namespace) + if parent and parent.github_repo_path: # pragma: no branch + git = GitBranchConfig( # type: ignore + branch=ns.git_branch, + git_only=ns.git_only, + parent_namespace=ns.parent_namespace, + root=GitRootConfig( # type: ignore + repo=parent.github_repo_path, + path=parent.git_path, + default_branch=parent.default_branch, + ), + ) + namespaces.append( + Namespace( # type: ignore + namespace=ns.namespace, + num_nodes=num_nodes or 0, + git=git, + ), + ) + return namespaces diff --git a/datajunction-server/datajunction_server/api/graphql/scalars/namespace.py b/datajunction-server/datajunction_server/api/graphql/scalars/namespace.py new file mode 100644 index 000000000..765ab65e3 --- /dev/null +++ b/datajunction-server/datajunction_server/api/graphql/scalars/namespace.py @@ -0,0 +1,49 @@ +"""Namespace GraphQL scalar.""" + +from typing import Annotated, Optional, Union + +import strawberry + + +@strawberry.type +class GitRootConfig: + """ + Git configuration for a namespace that is a git root. + Owns the repo reference and canonical config like defaultBranch. + """ + + repo: str + path: Optional[str] + default_branch: Optional[str] + + +@strawberry.type +class GitBranchConfig: + """ + Git configuration for a namespace that is a branch of a git root. + gitOnly=True means this branch cannot be edited from the UI. + """ + + branch: str + git_only: bool + parent_namespace: str + root: GitRootConfig + + +NamespaceGit = Annotated[ + Union[GitRootConfig, GitBranchConfig], + strawberry.union("NamespaceGit"), +] + + +@strawberry.type +class Namespace: + """ + A DJ namespace with node count and git configuration. + git is null for non-git-associated namespaces. + Use __typename to distinguish GitRootConfig from GitBranchConfig. + """ + + namespace: str + num_nodes: int + git: Optional[NamespaceGit] diff --git a/datajunction-server/datajunction_server/api/graphql/schema.graphql b/datajunction-server/datajunction_server/api/graphql/schema.graphql index 2e6253a91..28feffa57 100644 --- a/datajunction-server/datajunction_server/api/graphql/schema.graphql +++ b/datajunction-server/datajunction_server/api/graphql/schema.graphql @@ -191,6 +191,13 @@ type GeneratedSQL { errors: [DJError!]! } +type GitBranchConfig { + branch: String! + gitOnly: Boolean! + parentNamespace: String! + root: GitRootConfig! +} + type GitRepositoryInfo { """GitHub repository path (e.g., 'owner/repo')""" repo: String! @@ -216,6 +223,12 @@ type GitRepositoryInfo { gitOnly: Boolean! } +type GitRootConfig { + repo: String! + path: String + defaultBranch: String +} + """ The `JSON` scalar type represents JSON values as specified by [ECMA-404](https://ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf). """ @@ -281,6 +294,14 @@ type MetricMetadata { incompatibleDruidFunctions: [String!]! } +type Namespace { + namespace: String! + numNodes: Int! + git: NamespaceGit +} + +union NamespaceGit = GitRootConfig | GitBranchConfig + type Node { id: Union! name: String! @@ -609,6 +630,9 @@ type Query { """Limit collections""" limit: Int = 100 ): [Collection!]! + + """List all active namespaces with node counts and git configuration.""" + listNamespaces: [Namespace!]! } type SemanticEntity { diff --git a/datajunction-server/datajunction_server/internal/namespaces.py b/datajunction-server/datajunction_server/internal/namespaces.py index eec79456c..1b01d3c6a 100644 --- a/datajunction-server/datajunction_server/internal/namespaces.py +++ b/datajunction-server/datajunction_server/internal/namespaces.py @@ -237,27 +237,19 @@ def get_parent_namespaces(namespace: str): return [SEPARATOR.join(parts[0:i]) for i in range(len(parts)) if parts[0:i]] -async def get_git_info_for_namespace( - session: AsyncSession, +def resolve_git_info_from_map( namespace: str, + ns_map: dict, ) -> Optional[dict]: """ - Return git repository info for a namespace. + Resolve git info for a namespace using a pre-loaded map of NodeNamespace rows. - Algorithm: - 1. Batch-load all string ancestors (e.g. ``projectx``, ``projectx.feature_one`` - for ``projectx.feature_one.cubes``) and find the nearest one with - ``git_branch`` set — that is the branch namespace. - 2. If the branch namespace has ``parent_namespace`` set, do one FK hop to - load the git root (which carries ``github_repo_path`` / ``git_path``). - Otherwise, look for ``github_repo_path`` among the string ancestors - (self-contained root case). + The map must contain all string ancestors of ``namespace`` (and their FK-hop + parents if applicable) — callers are responsible for pre-loading these rows. + This is the pure resolution logic extracted from ``get_git_info_for_namespace`` + so it can be shared with batch DataLoader callers. """ ancestor_names = get_parent_namespaces(namespace) + [namespace] - stmt = select(NodeNamespace).where(NodeNamespace.namespace.in_(ancestor_names)) - rows = (await session.execute(stmt)).scalars().all() - ns_map = {ns.namespace: ns for ns in rows} - reversed_names = list(reversed(ancestor_names)) branch_ns = next( @@ -267,16 +259,17 @@ async def get_git_info_for_namespace( # Resolve config_ns: find the git root (has github_repo_path). # If branch_ns.parent_namespace points outside the string hierarchy (a sibling), - # do one FK hop — that sibling is the git root. + # use the FK-hop parent if it was pre-loaded into ns_map. # Otherwise, the git root is reachable via string ancestors. config_ns: Optional[NodeNamespace] = None if ( branch_ns and branch_ns.parent_namespace - and branch_ns.parent_namespace not in ns_map + and branch_ns.parent_namespace not in ancestor_names + and branch_ns.parent_namespace in ns_map ): - fk_parent = await session.get(NodeNamespace, branch_ns.parent_namespace) - if fk_parent and fk_parent.github_repo_path: + fk_parent = ns_map[branch_ns.parent_namespace] + if fk_parent.github_repo_path: config_ns = fk_parent if not config_ns: config_ns = next( @@ -306,6 +299,45 @@ async def get_git_info_for_namespace( } +async def get_git_info_for_namespace( + session: AsyncSession, + namespace: str, +) -> Optional[dict]: + """ + Return git repository info for a namespace. + + Algorithm: + 1. Batch-load all string ancestors (e.g. ``projectx``, ``projectx.feature_one`` + for ``projectx.feature_one.cubes``) and find the nearest one with + ``git_branch`` set — that is the branch namespace. + 2. If the branch namespace has ``parent_namespace`` set, do one FK hop to + load the git root (which carries ``github_repo_path`` / ``git_path``). + Otherwise, look for ``github_repo_path`` among the string ancestors + (self-contained root case). + """ + ancestor_names = get_parent_namespaces(namespace) + [namespace] + stmt = select(NodeNamespace).where(NodeNamespace.namespace.in_(ancestor_names)) + rows = (await session.execute(stmt)).scalars().all() + ns_map = {ns.namespace: ns for ns in rows} + + # Handle FK hop: if the branch ns points outside the string hierarchy, load it + reversed_names = list(reversed(ancestor_names)) + branch_ns = next( + (ns_map[n] for n in reversed_names if ns_map.get(n) and ns_map[n].git_branch), + None, + ) + if ( + branch_ns + and branch_ns.parent_namespace + and branch_ns.parent_namespace not in ns_map + ): + fk_parent = await session.get(NodeNamespace, branch_ns.parent_namespace) + if fk_parent: # pragma: no branch + ns_map[fk_parent.namespace] = fk_parent + + return resolve_git_info_from_map(namespace, ns_map) + + async def create_namespace( session: AsyncSession, namespace: str, diff --git a/datajunction-server/tests/api/graphql/namespaces_test.py b/datajunction-server/tests/api/graphql/namespaces_test.py new file mode 100644 index 000000000..b7343080f --- /dev/null +++ b/datajunction-server/tests/api/graphql/namespaces_test.py @@ -0,0 +1,125 @@ +""" +Tests for the namespaces GraphQL query. +""" + +import pytest +from httpx import AsyncClient + +NAMESPACES_QUERY = """ +{ + listNamespaces { + namespace + numNodes + git { + __typename + ... on GitRootConfig { + repo + path + defaultBranch + } + ... on GitBranchConfig { + branch + gitOnly + parentNamespace + root { + repo + path + defaultBranch + } + } + } + } +} +""" + + +@pytest.mark.asyncio +async def test_list_namespaces_no_git( + client: AsyncClient, +) -> None: + """ + Non-git namespaces have git=null. + """ + await client.post("/namespaces/foo/") + await client.post("/namespaces/foo.bar/") + + response = await client.post("/graphql", json={"query": NAMESPACES_QUERY}) + assert response.status_code == 200 + data = response.json()["data"]["listNamespaces"] + namespaces = {ns["namespace"]: ns for ns in data} + + assert "foo" in namespaces + assert "foo.bar" in namespaces + assert namespaces["foo"]["numNodes"] == 0 + assert namespaces["foo"]["git"] is None + + +@pytest.mark.asyncio +async def test_list_namespaces_git_root( + client: AsyncClient, +) -> None: + """ + Git root namespaces have git.__typename == GitRootConfig. + """ + await client.post("/namespaces/myproject/") + await client.patch( + "/namespaces/myproject/git", + json={ + "github_repo_path": "owner/repo", + "git_path": "definitions/", + "default_branch": "main", + }, + ) + + response = await client.post("/graphql", json={"query": NAMESPACES_QUERY}) + assert response.status_code == 200 + data = response.json()["data"]["listNamespaces"] + namespaces = {ns["namespace"]: ns for ns in data} + + assert "myproject" in namespaces + ns = namespaces["myproject"] + assert ns["git"]["__typename"] == "GitRootConfig" + assert ns["git"]["repo"] == "owner/repo" + assert ns["git"]["path"] == "definitions/" + assert ns["git"]["defaultBranch"] == "main" + + +@pytest.mark.asyncio +async def test_list_namespaces_git_branch( + client: AsyncClient, +) -> None: + """ + Branch namespaces have git.__typename == GitBranchConfig with root embedded. + """ + await client.post("/namespaces/myproject/") + await client.patch( + "/namespaces/myproject/git", + json={ + "github_repo_path": "owner/repo", + "git_path": "definitions/", + "default_branch": "main", + }, + ) + await client.post("/namespaces/myproject.feature_x/") + await client.patch( + "/namespaces/myproject.feature_x/git", + json={ + "git_branch": "feature-x", + "parent_namespace": "myproject", + }, + ) + + response = await client.post("/graphql", json={"query": NAMESPACES_QUERY}) + assert response.status_code == 200 + data = response.json()["data"]["listNamespaces"] + namespaces = {ns["namespace"]: ns for ns in data} + + assert "myproject.feature_x" in namespaces + ns = namespaces["myproject.feature_x"] + assert ns["git"]["__typename"] == "GitBranchConfig" + assert ns["git"]["branch"] == "feature-x" + assert ns["git"]["gitOnly"] is False + assert ns["git"]["parentNamespace"] == "myproject" + assert ns["git"]["root"]["repo"] == "owner/repo" + assert ns["git"]["root"]["path"] == "definitions/" + assert ns["git"]["root"]["defaultBranch"] == "main" diff --git a/datajunction-ui/src/app/pages/NamespacePage/Explorer.jsx b/datajunction-ui/src/app/pages/NamespacePage/Explorer.jsx index 55e91b32e..fa8530a57 100644 --- a/datajunction-ui/src/app/pages/NamespacePage/Explorer.jsx +++ b/datajunction-ui/src/app/pages/NamespacePage/Explorer.jsx @@ -8,7 +8,7 @@ const Explorer = ({ item = [], current, isTopLevel = false, - namespaceSources = {}, + gitRoots = new Set(), }) => { const djClient = useContext(DJClientContext).DataJunctionAPI; const [items, setItems] = useState([]); @@ -139,50 +139,41 @@ const Explorer = ({ > {items.namespace} - {/* Deployment source badge */} - {namespaceSources[items.path] && - namespaceSources[items.path].total_deployments > 0 && - namespaceSources[items.path].primary_source?.type === 'git' && ( - + - - - - - - - Git - - )} + + + + + + Git + + )}