Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions datajunction-server/datajunction_server/api/graphql/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
)
from datajunction_server.api.graphql.queries.catalogs import list_catalogs
from datajunction_server.api.graphql.queries.collections import list_collections
from datajunction_server.api.graphql.queries.namespaces import list_namespaces
from datajunction_server.api.graphql.queries.dag import (
common_dimensions,
downstream_nodes,
Expand All @@ -41,6 +42,7 @@
DialectInfo,
)
from datajunction_server.api.graphql.scalars.collection import Collection
from datajunction_server.api.graphql.scalars.namespace import Namespace
from datajunction_server.api.graphql.scalars.node import DimensionAttribute, Node
from datajunction_server.api.graphql.scalars.sql import (
GeneratedSQL,
Expand Down Expand Up @@ -194,6 +196,12 @@ class Query:
description="List collections, optionally filtered by creator (for My Workspace).",
)

# Namespace queries
list_namespaces: list[Namespace] = strawberry.field(
resolver=log_resolver(list_namespaces),
description="List all active namespaces with node counts and git configuration.",
)


schema = strawberry.Schema(query=Query)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
Namespace GraphQL queries.
"""

from typing import Optional, Union

from sqlalchemy import func, select
from strawberry.types import Info

from datajunction_server.api.graphql.scalars.namespace import (
GitBranchConfig,
GitRootConfig,
Namespace,
)
from datajunction_server.database.namespace import NodeNamespace
from datajunction_server.database.node import Node


async def list_namespaces(
*,
info: Info = None,
) -> list[Namespace]:
"""
List all active namespaces with node counts and git configuration.

For git root namespaces, git is a GitRootConfig.
For branch namespaces, git is a GitBranchConfig with the root config embedded.
For non-git namespaces, git is null.
"""
session = info.context["session"] # type: ignore
statement = (
select(NodeNamespace, func.count(Node.id).label("num_nodes"))
.join(Node, onclause=NodeNamespace.namespace == Node.namespace, isouter=True)
.where(NodeNamespace.deactivated_at.is_(None))
.group_by(NodeNamespace.namespace)
)
result = await session.execute(statement)
rows = result.all()

# Build a map so branch namespaces can resolve their root config inline
ns_map = {ns.namespace: ns for ns, _ in rows}

namespaces = []
for ns, num_nodes in rows:
git: Optional[Union[GitRootConfig, GitBranchConfig]] = None
if ns.github_repo_path:
git = GitRootConfig( # type: ignore
repo=ns.github_repo_path,
path=ns.git_path,
default_branch=ns.default_branch,
)
elif ns.git_branch and ns.parent_namespace:
parent = ns_map.get(ns.parent_namespace)
if parent and parent.github_repo_path: # pragma: no branch
git = GitBranchConfig( # type: ignore
branch=ns.git_branch,
git_only=ns.git_only,
parent_namespace=ns.parent_namespace,
root=GitRootConfig( # type: ignore
repo=parent.github_repo_path,
path=parent.git_path,
default_branch=parent.default_branch,
),
)
namespaces.append(
Namespace( # type: ignore
namespace=ns.namespace,
num_nodes=num_nodes or 0,
git=git,
),
)
return namespaces
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Namespace GraphQL scalar."""

from typing import Annotated, Optional, Union

import strawberry


@strawberry.type
class GitRootConfig:
"""
Git configuration for a namespace that is a git root.
Owns the repo reference and canonical config like defaultBranch.
"""

repo: str
path: Optional[str]
default_branch: Optional[str]


@strawberry.type
class GitBranchConfig:
"""
Git configuration for a namespace that is a branch of a git root.
gitOnly=True means this branch cannot be edited from the UI.
"""

branch: str
git_only: bool
parent_namespace: str
root: GitRootConfig


NamespaceGit = Annotated[
Union[GitRootConfig, GitBranchConfig],
strawberry.union("NamespaceGit"),
]


@strawberry.type
class Namespace:
"""
A DJ namespace with node count and git configuration.
git is null for non-git-associated namespaces.
Use __typename to distinguish GitRootConfig from GitBranchConfig.
"""

namespace: str
num_nodes: int
git: Optional[NamespaceGit]
24 changes: 24 additions & 0 deletions datajunction-server/datajunction_server/api/graphql/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,13 @@ type GeneratedSQL {
errors: [DJError!]!
}

type GitBranchConfig {
branch: String!
gitOnly: Boolean!
parentNamespace: String!
root: GitRootConfig!
}

type GitRepositoryInfo {
"""GitHub repository path (e.g., 'owner/repo')"""
repo: String!
Expand All @@ -216,6 +223,12 @@ type GitRepositoryInfo {
gitOnly: Boolean!
}

type GitRootConfig {
repo: String!
path: String
defaultBranch: String
}

"""
The `JSON` scalar type represents JSON values as specified by [ECMA-404](https://ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf).
"""
Expand Down Expand Up @@ -281,6 +294,14 @@ type MetricMetadata {
incompatibleDruidFunctions: [String!]!
}

type Namespace {
namespace: String!
numNodes: Int!
git: NamespaceGit
}

union NamespaceGit = GitRootConfig | GitBranchConfig

type Node {
id: Union!
name: String!
Expand Down Expand Up @@ -609,6 +630,9 @@ type Query {
"""Limit collections"""
limit: Int = 100
): [Collection!]!

"""List all active namespaces with node counts and git configuration."""
listNamespaces: [Namespace!]!
}

type SemanticEntity {
Expand Down
70 changes: 51 additions & 19 deletions datajunction-server/datajunction_server/internal/namespaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,27 +237,19 @@ def get_parent_namespaces(namespace: str):
return [SEPARATOR.join(parts[0:i]) for i in range(len(parts)) if parts[0:i]]


async def get_git_info_for_namespace(
session: AsyncSession,
def resolve_git_info_from_map(
namespace: str,
ns_map: dict,
) -> Optional[dict]:
"""
Return git repository info for a namespace.
Resolve git info for a namespace using a pre-loaded map of NodeNamespace rows.

Algorithm:
1. Batch-load all string ancestors (e.g. ``projectx``, ``projectx.feature_one``
for ``projectx.feature_one.cubes``) and find the nearest one with
``git_branch`` set — that is the branch namespace.
2. If the branch namespace has ``parent_namespace`` set, do one FK hop to
load the git root (which carries ``github_repo_path`` / ``git_path``).
Otherwise, look for ``github_repo_path`` among the string ancestors
(self-contained root case).
The map must contain all string ancestors of ``namespace`` (and their FK-hop
parents if applicable) — callers are responsible for pre-loading these rows.
This is the pure resolution logic extracted from ``get_git_info_for_namespace``
so it can be shared with batch DataLoader callers.
"""
ancestor_names = get_parent_namespaces(namespace) + [namespace]
stmt = select(NodeNamespace).where(NodeNamespace.namespace.in_(ancestor_names))
rows = (await session.execute(stmt)).scalars().all()
ns_map = {ns.namespace: ns for ns in rows}

reversed_names = list(reversed(ancestor_names))

branch_ns = next(
Expand All @@ -267,16 +259,17 @@ async def get_git_info_for_namespace(

# Resolve config_ns: find the git root (has github_repo_path).
# If branch_ns.parent_namespace points outside the string hierarchy (a sibling),
# do one FK hop — that sibling is the git root.
# use the FK-hop parent if it was pre-loaded into ns_map.
# Otherwise, the git root is reachable via string ancestors.
config_ns: Optional[NodeNamespace] = None
if (
branch_ns
and branch_ns.parent_namespace
and branch_ns.parent_namespace not in ns_map
and branch_ns.parent_namespace not in ancestor_names
and branch_ns.parent_namespace in ns_map
):
fk_parent = await session.get(NodeNamespace, branch_ns.parent_namespace)
if fk_parent and fk_parent.github_repo_path:
fk_parent = ns_map[branch_ns.parent_namespace]
if fk_parent.github_repo_path:
config_ns = fk_parent
if not config_ns:
config_ns = next(
Expand Down Expand Up @@ -306,6 +299,45 @@ async def get_git_info_for_namespace(
}


async def get_git_info_for_namespace(
session: AsyncSession,
namespace: str,
) -> Optional[dict]:
"""
Return git repository info for a namespace.

Algorithm:
1. Batch-load all string ancestors (e.g. ``projectx``, ``projectx.feature_one``
for ``projectx.feature_one.cubes``) and find the nearest one with
``git_branch`` set — that is the branch namespace.
2. If the branch namespace has ``parent_namespace`` set, do one FK hop to
load the git root (which carries ``github_repo_path`` / ``git_path``).
Otherwise, look for ``github_repo_path`` among the string ancestors
(self-contained root case).
"""
ancestor_names = get_parent_namespaces(namespace) + [namespace]
stmt = select(NodeNamespace).where(NodeNamespace.namespace.in_(ancestor_names))
rows = (await session.execute(stmt)).scalars().all()
ns_map = {ns.namespace: ns for ns in rows}

# Handle FK hop: if the branch ns points outside the string hierarchy, load it
reversed_names = list(reversed(ancestor_names))
branch_ns = next(
(ns_map[n] for n in reversed_names if ns_map.get(n) and ns_map[n].git_branch),
None,
)
if (
branch_ns
and branch_ns.parent_namespace
and branch_ns.parent_namespace not in ns_map
):
fk_parent = await session.get(NodeNamespace, branch_ns.parent_namespace)
if fk_parent: # pragma: no branch
ns_map[fk_parent.namespace] = fk_parent

return resolve_git_info_from_map(namespace, ns_map)


async def create_namespace(
session: AsyncSession,
namespace: str,
Expand Down
Loading
Loading