diff --git a/modules/graph-layers/README.md b/modules/graph-layers/README.md index 400414ae..e7615e4f 100644 --- a/modules/graph-layers/README.md +++ b/modules/graph-layers/README.md @@ -5,3 +5,20 @@ TBD

+ +## GraphML loader + +`@deck.gl-community/graph-layers` ships a utility for loading GraphML documents into the +`TabularGraph` runtime via `loadGraphML`. The loader is designed for GraphML 1.0 documents and +supports the following constructs: + +- `` elements with `edgedefault` set to either `directed` or `undirected`. +- `` and `` elements with required identifiers and edge endpoints. +- `` declarations scoped to `node`, `edge`, or `all` domains, including `` values. +- `` entries attached to nodes or edges that reference GraphML keys. Values are converted to + numbers or booleans when a key declares `attr.type` of `int`, `long`, `float`, `double`, or + `boolean`. Unrecognized types fall back to strings. + +The loader intentionally ignores unsupported GraphML features such as hyperedges, ports, or nested +graphs. Data blocks that contain nested XML are preserved as serialized JSON strings inside the +resulting attribute map so applications can continue to access vendor-specific payloads. diff --git a/modules/graph-layers/package.json b/modules/graph-layers/package.json index 6b776080..54b77e14 100644 --- a/modules/graph-layers/package.json +++ b/modules/graph-layers/package.json @@ -56,6 +56,7 @@ "d3-force": "^3.0.0", "d3-format": "^3.1.0", "d3-scale": "^4.0.2", + "fast-xml-parser": "^4.2.5", "global": "^4.4.0", "lodash.isequal": "^4.5.0", "lodash.pick": "^4.4.0", diff --git a/modules/graph-layers/src/index.ts b/modules/graph-layers/src/index.ts index e801908c..0b04a755 100644 --- a/modules/graph-layers/src/index.ts +++ b/modules/graph-layers/src/index.ts @@ -97,6 +97,7 @@ export { type DotGraphLoaderMetadata, type DotGraphLoaderResult } from './loaders/dot-graph-loader'; +export {loadGraphML, parseGraphML} from './loaders/graphml-loader'; // utils export {mixedGetPosition} from './utils/layer-utils'; diff --git a/modules/graph-layers/src/loaders/graphml-loader.ts b/modules/graph-layers/src/loaders/graphml-loader.ts new file mode 100644 index 00000000..a6ca7424 --- /dev/null +++ b/modules/graph-layers/src/loaders/graphml-loader.ts @@ -0,0 +1,384 @@ +// deck.gl-community +// SPDX-License-Identifier: MIT +// Copyright (c) vis.gl contributors + +import {XMLParser} from 'fast-xml-parser'; + +import type {GraphData, GraphEdgeData, GraphNodeData} from '../graph-data/graph-data'; +import {createTabularGraphFromData} from '../graph/create-tabular-graph-from-data'; +import type {TabularGraph} from '../graph/tabular-graph'; + +const XML_ATTRIBUTE_PREFIX = '@_'; +const XML_TEXT_KEY = '#text'; + +type GraphMLDomain = 'all' | 'node' | 'edge' | 'graph'; + +type GraphMLAttributeType = 'boolean' | 'int' | 'long' | 'float' | 'double' | 'string'; + +type GraphMLKeyDefinition = { + id: string; + name: string; + domain: GraphMLDomain; + type: GraphMLAttributeType; + defaultValue?: unknown; +}; + +type GraphMLObject = Record & { + [XML_TEXT_KEY]?: string; +}; + +const graphmlParser = new XMLParser({ + ignoreAttributes: false, + attributeNamePrefix: XML_ATTRIBUTE_PREFIX, + textNodeName: XML_TEXT_KEY, + trimValues: true, + parseAttributeValue: false, + parseTagValue: false, + allowBooleanAttributes: true +}); + +export type GraphMLInput = string | ArrayBuffer | Uint8Array; + +export function loadGraphML(graphml: GraphMLInput): TabularGraph { + const data = parseGraphML(graphml); + return createTabularGraphFromData(data); +} + +export function parseGraphML(graphml: GraphMLInput): GraphData { + const xmlText = decodeGraphML(graphml); + const document = graphmlParser.parse(xmlText) as GraphMLObject; + const graphmlRoot = getGraphMLRoot(document); + if (!graphmlRoot) { + throw new Error('GraphML document does not contain a element.'); + } + + const graphElement = getGraphElement(graphmlRoot); + if (!graphElement) { + throw new Error('GraphML document does not contain a element.'); + } + + const keyDefinitions = collectKeyDefinitions(graphmlRoot, graphElement); + const defaultDirected = parseEdgeDefault(graphElement[`${XML_ATTRIBUTE_PREFIX}edgedefault`]); + + const nodes = normalizeArray(graphElement.node).map((node) => parseNode(node, keyDefinitions)); + const edges = normalizeArray(graphElement.edge).map((edge, index) => + parseEdge(edge, index, keyDefinitions, defaultDirected) + ); + + const filteredNodes = nodes.filter((node): node is GraphNodeData => Boolean(node)); + const filteredEdges = edges.filter((edge): edge is GraphEdgeData => Boolean(edge)); + + return { + type: 'graph-data', + nodes: filteredNodes, + edges: filteredEdges + } satisfies GraphData; +} + +function decodeGraphML(graphml: GraphMLInput): string { + if (typeof graphml === 'string') { + return graphml; + } + + if (graphml instanceof Uint8Array) { + return new TextDecoder().decode(graphml); + } + + if (graphml instanceof ArrayBuffer) { + return new TextDecoder().decode(new Uint8Array(graphml)); + } + + throw new Error('Unsupported GraphML input. Expected a string, ArrayBuffer, or Uint8Array.'); +} + +function getGraphMLRoot(document: GraphMLObject): GraphMLObject | null { + const root = document.graphml; + if (isObject(root)) { + return root; + } + + const namespacedKey = Object.keys(document).find((key) => key.endsWith(':graphml')); + if (namespacedKey) { + const namespacedRoot = document[namespacedKey]; + if (isObject(namespacedRoot)) { + return namespacedRoot; + } + } + + return null; +} + +function getGraphElement(graphmlRoot: GraphMLObject): GraphMLObject | null { + const graph = graphmlRoot.graph; + if (!graph) { + return null; + } + + if (Array.isArray(graph)) { + const firstGraph = graph.find((entry) => isObject(entry)); + return isObject(firstGraph) ? firstGraph : null; + } + + return isObject(graph) ? graph : null; +} + +function collectKeyDefinitions( + graphmlRoot: GraphMLObject, + graphElement: GraphMLObject +): Map { + const keys = new Map(); + + for (const candidate of [...normalizeArray(graphmlRoot.key), ...normalizeArray(graphElement.key)]) { + if (isObject(candidate)) { + const id = String(candidate[`${XML_ATTRIBUTE_PREFIX}id`] ?? '').trim(); + if (id) { + const domain = normalizeDomain(candidate[`${XML_ATTRIBUTE_PREFIX}for`]); + const name = String(candidate[`${XML_ATTRIBUTE_PREFIX}attr.name`] ?? id).trim(); + const type = normalizeType(candidate[`${XML_ATTRIBUTE_PREFIX}attr.type`]); + const defaultNode = candidate.default ?? null; + const defaultValue = defaultNode !== null ? castDataValue(defaultNode, type) : undefined; + + keys.set(id, {id, domain, name, type, defaultValue}); + } + } + } + + return keys; +} + +function parseNode( + node: unknown, + keyDefinitions: Map +): GraphNodeData | null { + if (!isObject(node)) { + return null; + } + + const id = node[`${XML_ATTRIBUTE_PREFIX}id`]; + if (typeof id !== 'string' && typeof id !== 'number') { + return null; + } + + const attributes = buildAttributeBag('node', node.data, keyDefinitions); + + const graphNode: GraphNodeData = { + type: 'graph-node-data', + id, + attributes: Object.keys(attributes).length > 0 ? attributes : undefined + }; + + const label = attributes.label; + if (typeof label === 'string') { + graphNode.label = label; + } + + return graphNode; +} + +function parseEdge( + edge: unknown, + index: number, + keyDefinitions: Map, + defaultDirected: boolean +): GraphEdgeData | null { + if (!isObject(edge)) { + return null; + } + + const sourceId = edge[`${XML_ATTRIBUTE_PREFIX}source`]; + const targetId = edge[`${XML_ATTRIBUTE_PREFIX}target`]; + if (typeof sourceId !== 'string' && typeof sourceId !== 'number') { + return null; + } + if (typeof targetId !== 'string' && typeof targetId !== 'number') { + return null; + } + + const rawId = edge[`${XML_ATTRIBUTE_PREFIX}id`]; + const id = + typeof rawId === 'string' || typeof rawId === 'number' ? rawId : `edge-${index}`; + const directed = parseDirected(edge[`${XML_ATTRIBUTE_PREFIX}directed`], defaultDirected); + const attributes = buildAttributeBag('edge', edge.data, keyDefinitions); + + const graphEdge: GraphEdgeData = { + type: 'graph-edge-data', + id, + sourceId, + targetId, + directed, + attributes: Object.keys(attributes).length > 0 ? attributes : undefined + }; + + const label = attributes.label; + if (typeof label === 'string') { + graphEdge.label = label; + } + + return graphEdge; +} + +function buildAttributeBag( + domain: GraphMLDomain, + data: unknown, + keyDefinitions: Map +): Record { + const attributes: Record = {}; + + for (const key of keyDefinitions.values()) { + if (key.domain === 'all' || key.domain === domain) { + if (key.defaultValue !== undefined) { + attributes[key.name] = key.defaultValue; + } + } + } + + for (const entry of normalizeArray(data)) { + assignAttributeFromDataEntry(entry, keyDefinitions, attributes); + } + + return attributes; +} + +function assignAttributeFromDataEntry( + entry: unknown, + keyDefinitions: Map, + attributes: Record +): void { + if (!isObject(entry)) { + return; + } + + const keyId = entry[`${XML_ATTRIBUTE_PREFIX}key`]; + if (typeof keyId !== 'string') { + return; + } + + const definition = keyDefinitions.get(keyId); + const attributeName = definition?.name ?? keyId; + const value = castDataValue(entry, definition?.type ?? 'string'); + if (value !== undefined) { + attributes[attributeName] = value; + } +} + +function castDataValue(value: unknown, type: GraphMLAttributeType): unknown { + if (value === null || typeof value === 'undefined') { + return undefined; + } + + const text = extractTextContent(value); + if (text === undefined) { + return undefined; + } + + if (type === 'boolean') { + return parseBoolean(text); + } + if (type === 'int' || type === 'long') { + const parsed = Number.parseInt(text, 10); + return Number.isNaN(parsed) ? text : parsed; + } + if (type === 'float' || type === 'double') { + const parsed = Number.parseFloat(text); + return Number.isNaN(parsed) ? text : parsed; + } + return text; +} + +function extractTextContent(value: unknown): string | undefined { + if (typeof value === 'string') { + return value; + } + + if (typeof value === 'number' || typeof value === 'boolean') { + return String(value); + } + + if (Array.isArray(value)) { + return extractTextContent(value[0]); + } + + if (isObject(value)) { + const text = value[XML_TEXT_KEY]; + if (typeof text === 'string') { + return text; + } + + const nonAttributeEntries = Object.entries(value).filter( + ([key]) => !key.startsWith(XML_ATTRIBUTE_PREFIX) + ); + if (nonAttributeEntries.length === 0) { + return undefined; + } + return JSON.stringify(Object.fromEntries(nonAttributeEntries)); + } + + return undefined; +} + +function normalizeArray(value: T | T[] | null | undefined): T[] { + if (!value) { + return []; + } + return Array.isArray(value) ? value : [value]; +} + +function isObject(value: unknown): value is GraphMLObject { + return Boolean(value) && typeof value === 'object' && !Array.isArray(value); +} + +function normalizeDomain(value: unknown): GraphMLDomain { + if (typeof value !== 'string') { + return 'all'; + } + + const lower = value.toLowerCase(); + if (lower === 'node' || lower === 'edge' || lower === 'graph' || lower === 'all') { + return lower; + } + return 'all'; +} + +function normalizeType(value: unknown): GraphMLAttributeType { + if (typeof value !== 'string') { + return 'string'; + } + + const lower = value.toLowerCase(); + if (lower === 'boolean' || lower === 'int' || lower === 'long' || lower === 'float' || lower === 'double') { + return lower; + } + return 'string'; +} + +function parseEdgeDefault(value: unknown): boolean { + if (typeof value !== 'string') { + return false; + } + return value.toLowerCase() !== 'undirected'; +} + +function parseDirected(value: unknown, defaultDirected: boolean): boolean { + if (typeof value === 'string') { + const normalized = value.trim().toLowerCase(); + if (normalized === 'true' || normalized === '1' || normalized === 'yes' || normalized === 'directed') { + return true; + } + if (normalized === 'false' || normalized === '0' || normalized === 'no' || normalized === 'undirected') { + return false; + } + } + return defaultDirected; +} + +function parseBoolean(value: string): boolean { + const normalized = value.trim().toLowerCase(); + if (normalized === 'true' || normalized === '1' || normalized === 'yes' || normalized === 'y') { + return true; + } + if (normalized === 'false' || normalized === '0' || normalized === 'no' || normalized === 'n') { + return false; + } + return Boolean(value); +} + diff --git a/modules/graph-layers/test/data/__fixtures__/graphml/basic.graphml b/modules/graph-layers/test/data/__fixtures__/graphml/basic.graphml new file mode 100644 index 00000000..ef5a11ab --- /dev/null +++ b/modules/graph-layers/test/data/__fixtures__/graphml/basic.graphml @@ -0,0 +1,30 @@ + + + + + 5 + + + 1.5 + + + true + + + + Node Zero + false + + + Node One + 7 + note + + + 2.5 + + + true + + + diff --git a/modules/graph-layers/test/data/__fixtures__/graphml/defaults.graphml b/modules/graph-layers/test/data/__fixtures__/graphml/defaults.graphml new file mode 100644 index 00000000..a2c39876 --- /dev/null +++ b/modules/graph-layers/test/data/__fixtures__/graphml/defaults.graphml @@ -0,0 +1,14 @@ + + + + general + + + 3.5 + + + + + + + diff --git a/modules/graph-layers/test/loaders/graphml-loader.spec.ts b/modules/graph-layers/test/loaders/graphml-loader.spec.ts new file mode 100644 index 00000000..c2b6fb86 --- /dev/null +++ b/modules/graph-layers/test/loaders/graphml-loader.spec.ts @@ -0,0 +1,93 @@ +// deck.gl-community +// SPDX-License-Identifier: MIT +// Copyright (c) vis.gl contributors + +import {beforeAll, describe, expect, it} from 'vitest'; +import {loadGraphML, parseGraphML} from '../../src/loaders/graphml-loader'; +import basicGraphml from '../data/__fixtures__/graphml/basic.graphml?raw'; +import defaultsGraphml from '../data/__fixtures__/graphml/defaults.graphml?raw'; + +beforeAll(() => { + globalThis.CustomEvent = Event as any; +}); + +describe('loadGraphML', () => { + let graph: ReturnType; + + beforeAll(() => { + graph = loadGraphML(basicGraphml); + }); + + it('parses node attributes from GraphML text', () => { + const nodeIds = Array.from(graph.getNodes(), (node) => node.getId()); + expect(nodeIds).toEqual(expect.arrayContaining(['n0', 'n1'])); + + const nodeZero = graph.findNodeById('n0'); + if (!nodeZero) { + throw new Error('Expected node n0 to be defined'); + } + expect(nodeZero.getPropertyValue('label')).toBe('Node Zero'); + expect(nodeZero.getPropertyValue('flag')).toBe(false); + + const nodeOne = graph.findNodeById('n1'); + if (!nodeOne) { + throw new Error('Expected node n1 to be defined'); + } + const nodeOneCount = nodeOne.getPropertyValue('count'); + expect(nodeOne.getPropertyValue('label')).toBe('Node One'); + expect(nodeOneCount).toBe(7); + expect(typeof nodeOneCount).toBe('number'); + expect(nodeOne.getPropertyValue('flag')).toBe(true); + expect(nodeOne.getPropertyValue('custom-text')).toBe('note'); + }); + + it('parses edges and typed defaults', () => { + const edges = Array.from(graph.getEdges()); + expect(edges).toHaveLength(2); + + const [firstEdge, secondEdge] = edges; + expect(firstEdge.getId()).toBe('e0'); + expect(firstEdge.isDirected()).toBe(true); + expect(firstEdge.getPropertyValue('weight')).toBe(2.5); + expect(typeof firstEdge.getPropertyValue('weight')).toBe('number'); + expect(firstEdge.getPropertyValue('flag')).toBe(true); + + expect(secondEdge.getId()).toBe('edge-1'); + expect(secondEdge.isDirected()).toBe(false); + expect(secondEdge.getPropertyValue('weight')).toBe(1.5); + expect(secondEdge.getPropertyValue('flag')).toBe(true); + }); + + it('accepts ArrayBuffer inputs and applies default values', () => { + const encoder = new TextEncoder(); + const buffer = encoder.encode(defaultsGraphml); + const arrayBuffer = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength); + + const bufferedGraph = loadGraphML(arrayBuffer); + const nodes = Array.from(bufferedGraph.getNodes()); + expect(nodes).toHaveLength(2); + expect(nodes.every((node) => node.getPropertyValue('category') === 'general')).toBe(true); + + const [edge] = Array.from(bufferedGraph.getEdges()); + expect(edge.isDirected()).toBe(false); + expect(edge.getPropertyValue('weight')).toBe(3.5); + }); +}); + +describe('parseGraphML', () => { + it('returns GraphData compatible objects', () => { + const data = parseGraphML(basicGraphml); + + expect(data.type).toBe('graph-data'); + expect(data.nodes).toHaveLength(2); + expect(data.edges).toHaveLength(2); + + const firstEdge = data.edges?.[0]; + if (!firstEdge) { + throw new Error('Expected first edge to be defined'); + } + expect(firstEdge.directed).toBe(true); + expect(firstEdge.attributes?.weight).toBe(2.5); + expect(firstEdge.attributes?.flag).toBe(true); + }); +}); diff --git a/yarn.lock b/yarn.lock index 5715bac8..462cbb8e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1462,6 +1462,7 @@ __metadata: d3-force: "npm:^3.0.0" d3-format: "npm:^3.1.0" d3-scale: "npm:^4.0.2" + fast-xml-parser: "npm:^4.2.5" global: "npm:^4.4.0" lodash.isequal: "npm:^4.5.0" lodash.pick: "npm:^4.4.0"