From f2d7c2ad525023c9a0ecb2c40fec546300dab8e3 Mon Sep 17 00:00:00 2001 From: AdityaChaubeyIITM Date: Sat, 22 Mar 2025 12:44:41 +0530 Subject: [PATCH 1/5] Added Interface for Netzschleuder .gml.zst files --- src/igraph/__init__.py | 6 ++++ src/igraph/io/repositories.py | 67 +++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 src/igraph/io/repositories.py diff --git a/src/igraph/__init__.py b/src/igraph/__init__.py index 7f0e328b7..7ec62bc4f 100644 --- a/src/igraph/__init__.py +++ b/src/igraph/__init__.py @@ -213,6 +213,9 @@ _construct_graph_from_graph_tool, _export_graph_to_graph_tool, ) +from igraph.io.repositories import( + _construct_graph_from_Netzschleuder, +) from igraph.io.random import ( _construct_random_geometric_graph, ) @@ -462,6 +465,9 @@ def __init__(self, *args, **kwds): from_graph_tool = classmethod(_construct_graph_from_graph_tool) to_graph_tool = _export_graph_to_graph_tool + # Repositories + from_Netzschleuder = classmethod(_construct_graph_from_Netzschleuder) + # Files Read_DIMACS = classmethod(_construct_graph_from_dimacs_file) write_dimacs = _write_graph_to_dimacs_file diff --git a/src/igraph/io/repositories.py b/src/igraph/io/repositories.py new file mode 100644 index 000000000..b60b8aaa7 --- /dev/null +++ b/src/igraph/io/repositories.py @@ -0,0 +1,67 @@ +import os +import tempfile +import requests +import zstandard as zstd +import igraph as ig + +def _construct_graph_from_Netzschleuder(cls=ig.Graph, name: str = None, net: str = None) -> ig.Graph: + """ + Downloads, decompresses, and loads a graph from Netzschleuder into an igraph.Graph. + + Parameters: + cls (igraph.Graph, optional): The graph class to use (default: ig.Graph). + name (str): The dataset name (e.g., "bison"). + net (str, optional): The specific network file (defaults to `name`). + + Returns: + igraph.Graph: The loaded graph. + + Raises: + ValueError: If the dataset or network file does not exist. + RuntimeError: If there are issues with download or decompression. + """ + if name is None: + raise ValueError("Dataset name must be provided.") + + base_url = "https://networks.skewed.de/net" + net = net or name # Default net name + + # Check dataset existence + dataset_url = f"{base_url}/{name}" + if requests.head(dataset_url, timeout=5).status_code != 200: + raise ValueError(f"Dataset '{name}' does not exist at {dataset_url}.") + + # Check network file existence + file_url = f"{dataset_url}/files/{net}.gml.zst" + if requests.head(file_url, timeout=5).status_code != 200: + raise ValueError(f"Network file '{net}.gml.zst' does not exist at {file_url}.") + + try: + # Download the compressed file + with tempfile.NamedTemporaryFile(delete=False, suffix=".zst") as tmp_zst_file: + response = requests.get(file_url, stream=True, timeout=10) + response.raise_for_status() + tmp_zst_file.write(response.content) + tmp_zst_path = tmp_zst_file.name + + # Decompress the file + dctx = zstd.ZstdDecompressor() + with tempfile.NamedTemporaryFile(delete=False, suffix=".gml") as tmp_gml_file: + with open(tmp_zst_path, "rb") as compressed: + dctx.copy_stream(compressed, tmp_gml_file) + tmp_gml_path = tmp_gml_file.name + + # Load graph using the given class + graph = cls.Read_GML(tmp_gml_path) + + except requests.RequestException as e: + raise RuntimeError(f"Network error: {e}") + except zstd.ZstdError as e: + raise RuntimeError(f"Decompression error: {e}") + except Exception as e: + raise RuntimeError(f"Error processing file: {e}") + finally: + os.remove(tmp_zst_path) + os.remove(tmp_gml_path) + + return graph \ No newline at end of file From 22e10eb557ddc5ad1d3bf60e9c4560a8385a1cc7 Mon Sep 17 00:00:00 2001 From: AdityaHere Date: Sat, 22 Mar 2025 16:29:07 +0530 Subject: [PATCH 2/5] Update repositories.py Changed the API call to "urllib" instead of "requests" to avoid import errors in the build --- src/igraph/io/repositories.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/igraph/io/repositories.py b/src/igraph/io/repositories.py index b60b8aaa7..07154160a 100644 --- a/src/igraph/io/repositories.py +++ b/src/igraph/io/repositories.py @@ -1,21 +1,18 @@ import os import tempfile -import requests +import urllib.request import zstandard as zstd import igraph as ig def _construct_graph_from_Netzschleuder(cls=ig.Graph, name: str = None, net: str = None) -> ig.Graph: """ - Downloads, decompresses, and loads a graph from Netzschleuder into an igraph.Graph. - + Downloads, decompresses, and loads a graph from a .gml.zst file from Netzschleuder into an igraph.Graph. Parameters: cls (igraph.Graph, optional): The graph class to use (default: ig.Graph). name (str): The dataset name (e.g., "bison"). net (str, optional): The specific network file (defaults to `name`). - Returns: igraph.Graph: The loaded graph. - Raises: ValueError: If the dataset or network file does not exist. RuntimeError: If there are issues with download or decompression. @@ -24,24 +21,31 @@ def _construct_graph_from_Netzschleuder(cls=ig.Graph, name: str = None, net: str raise ValueError("Dataset name must be provided.") base_url = "https://networks.skewed.de/net" - net = net or name # Default net name + net = net or name # Check dataset existence dataset_url = f"{base_url}/{name}" - if requests.head(dataset_url, timeout=5).status_code != 200: + try: + with urllib.request.urlopen(dataset_url) as response: + if response.status != 200: + raise ValueError(f"Dataset '{name}' does not exist at {dataset_url}.") + except: raise ValueError(f"Dataset '{name}' does not exist at {dataset_url}.") # Check network file existence file_url = f"{dataset_url}/files/{net}.gml.zst" - if requests.head(file_url, timeout=5).status_code != 200: + try: + with urllib.request.urlopen(file_url) as response: + if response.status != 200: + raise ValueError(f"Network file '{net}.gml.zst' does not exist at {file_url}.") + except: raise ValueError(f"Network file '{net}.gml.zst' does not exist at {file_url}.") try: # Download the compressed file with tempfile.NamedTemporaryFile(delete=False, suffix=".zst") as tmp_zst_file: - response = requests.get(file_url, stream=True, timeout=10) - response.raise_for_status() - tmp_zst_file.write(response.content) + with urllib.request.urlopen(file_url) as response: + tmp_zst_file.write(response.read()) tmp_zst_path = tmp_zst_file.name # Decompress the file @@ -54,7 +58,7 @@ def _construct_graph_from_Netzschleuder(cls=ig.Graph, name: str = None, net: str # Load graph using the given class graph = cls.Read_GML(tmp_gml_path) - except requests.RequestException as e: + except urllib.error.URLError as e: raise RuntimeError(f"Network error: {e}") except zstd.ZstdError as e: raise RuntimeError(f"Decompression error: {e}") @@ -64,4 +68,4 @@ def _construct_graph_from_Netzschleuder(cls=ig.Graph, name: str = None, net: str os.remove(tmp_zst_path) os.remove(tmp_gml_path) - return graph \ No newline at end of file + return graph From 732ace38a38677334657f604dd427624e05acb75 Mon Sep 17 00:00:00 2001 From: AdityaChaubeyIITM Date: Sat, 22 Mar 2025 18:34:40 +0530 Subject: [PATCH 3/5] Updated requirements.txt Added zstandard in requirements --- doc/source/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/requirements.txt b/doc/source/requirements.txt index 9044d840b..99eb1940a 100644 --- a/doc/source/requirements.txt +++ b/doc/source/requirements.txt @@ -11,3 +11,4 @@ numpy scipy pandas matplotlib +zstandrad \ No newline at end of file From f503e772209ae1f507f5263b07aec6d2ec0f144d Mon Sep 17 00:00:00 2001 From: AdityaHere Date: Fri, 28 Mar 2025 02:15:06 +0530 Subject: [PATCH 4/5] Update repositories.py --- src/igraph/io/repositories.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/igraph/io/repositories.py b/src/igraph/io/repositories.py index 07154160a..3c18ddb6a 100644 --- a/src/igraph/io/repositories.py +++ b/src/igraph/io/repositories.py @@ -1,7 +1,11 @@ import os import tempfile import urllib.request -import zstandard as zstd +try: + import zstandard as zstd +except ImportError: + os.system('pip install zstandard') + import zstandard as zstd import igraph as ig def _construct_graph_from_Netzschleuder(cls=ig.Graph, name: str = None, net: str = None) -> ig.Graph: From f95b283a79a193b88e09aacfc7342ee139eb7666 Mon Sep 17 00:00:00 2001 From: AdityaHere Date: Sat, 29 Mar 2025 23:38:39 +0530 Subject: [PATCH 5/5] Update repositories.py --- src/igraph/io/repositories.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/igraph/io/repositories.py b/src/igraph/io/repositories.py index 3c18ddb6a..053b3219f 100644 --- a/src/igraph/io/repositories.py +++ b/src/igraph/io/repositories.py @@ -6,9 +6,8 @@ except ImportError: os.system('pip install zstandard') import zstandard as zstd -import igraph as ig -def _construct_graph_from_Netzschleuder(cls=ig.Graph, name: str = None, net: str = None) -> ig.Graph: +def _construct_graph_from_Netzschleuder(cls, name: str = None, net: str = None): """ Downloads, decompresses, and loads a graph from a .gml.zst file from Netzschleuder into an igraph.Graph. Parameters: @@ -35,7 +34,9 @@ def _construct_graph_from_Netzschleuder(cls=ig.Graph, name: str = None, net: str raise ValueError(f"Dataset '{name}' does not exist at {dataset_url}.") except: raise ValueError(f"Dataset '{name}' does not exist at {dataset_url}.") - + + from igraph import Graph + # Check network file existence file_url = f"{dataset_url}/files/{net}.gml.zst" try: