Skip to content

Commit 67a9e4d

Browse files
authored
Add initial attempt at pagerank with hacky tests (#1)
* Add initial attempt at pagerank with hacky tests * Install networkx too * Fix failing test; maybe faster/better? * Even better * Split pagerank into two functions: grblas-native and networkx-facing. Also, optimize if adjacency matrix is iso-valued. I would bring this implementation to a benchmarking shootout! * Change how we convert NetworkX dicts to vectors. Also, sparsify vectors. May be a decent idea? Not sure. It probably doesn't matter most of the time, but I guess there's a chance it can make the matrix-vector multiply faster for some inputs. We don't drop 0s from the input matrix, because that would be expensive. * Clean up * Don't be cute; don't use masks, because they're bad for benchmarks. Also, add basic benchmark script. * Update to use latest grblas; also, add verify option to bench script. * Show grid of absolute differences between benchmark results
1 parent c544c79 commit 67a9e4d

File tree

9 files changed

+440
-7
lines changed

9 files changed

+440
-7
lines changed

.github/workflows/test.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
activate-environment: testing
3131
- name: Install dependencies
3232
run: |
33-
conda install -c conda-forge grblas pytest coverage black flake8 coveralls
33+
conda install -c conda-forge grblas networkx scipy pytest coverage black flake8 coveralls
3434
pip install -e .
3535
- name: Style checks
3636
run: |

README.md

+9-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
# **GraphBLAS Algorithms**
22

3-
[![conda-forge](https://img.shields.io/conda/vn/conda-forge/graphblas-algorithms.svg)](https://anaconda.org/conda-forge/graphblas-algorithms)
43
[![pypi](https://img.shields.io/pypi/v/graphblas-algorithms.svg)](https://pypi.python.org/pypi/graphblas-algorithms/)
54
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/metagraph-dev/graphblas-algorithms/blob/main/LICENSE)
65
[![Tests](https://github.com/metagraph-dev/graphblas-algorithms/workflows/Tests/badge.svg?branch=main)](https://github.com/metagraph-dev/graphblas-algorithms/actions)
7-
[![Docs](https://readthedocs.org/projects/graphblas-algorithms/badge/?version=latest)](https://graphblas-algorithms.readthedocs.io/en/latest/)
86
[![Coverage](https://coveralls.io/repos/metagraph-dev/graphblas-algorithms/badge.svg?branch=main)](https://coveralls.io/r/metagraph-dev/graphblas-algorithms)
97
[![Code style](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
8+
<!--- [![conda-forge](https://img.shields.io/conda/vn/conda-forge/graphblas-algorithms.svg)](https://anaconda.org/conda-forge/graphblas-algorithms) --->
9+
<!--- [![Docs](https://readthedocs.org/projects/graphblas-algorithms/badge/?version=latest)](https://graphblas-algorithms.readthedocs.io/en/latest/) --->
1010

11-
GraphBLAS algorithms written in Python with [`grblas`](https://github.com/metagraph-dev/grblas).
11+
GraphBLAS algorithms written in Python with [`grblas`](https://github.com/metagraph-dev/grblas). We are trying to target the NetworkX API algorithms where possible.
1212

13-
This is a work in progress. Stay tuned!
13+
### Installation
14+
```
15+
pip install graphblas-algorithms
16+
```
17+
18+
This is a work in progress. Stay tuned (or come help 😃)!

graphblas_algorithms/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from . import _version
2+
from .link_analysis import pagerank # noqa
23

34
__version__ = _version.get_versions()["version"]

graphblas_algorithms/conftest.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from networkx.conftest import * # noqa

graphblas_algorithms/link_analysis.py

+155
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
from collections import OrderedDict
2+
from warnings import warn
3+
4+
import grblas as gb
5+
import networkx as nx
6+
from grblas import Vector, binary, unary
7+
from grblas.semiring import plus_first, plus_times
8+
9+
10+
def pagerank_core(
11+
A,
12+
alpha=0.85,
13+
personalization=None,
14+
max_iter=100,
15+
tol=1e-06,
16+
nstart=None,
17+
dangling=None,
18+
row_degrees=None,
19+
name="pagerank",
20+
):
21+
N = A.nrows
22+
if A.nvals == 0:
23+
return Vector.new(float, N, name=name)
24+
25+
# Initial vector
26+
x = Vector.new(float, N, name="x")
27+
if nstart is None:
28+
x[:] = 1.0 / N
29+
else:
30+
denom = nstart.reduce(allow_empty=False).value
31+
if denom == 0:
32+
raise ZeroDivisionError()
33+
x << nstart / denom
34+
35+
# Personalization vector or scalar
36+
if personalization is None:
37+
p = 1.0 / N
38+
else:
39+
denom = personalization.reduce(allow_empty=False).value
40+
if denom == 0:
41+
raise ZeroDivisionError()
42+
p = (personalization / denom).new(name="p")
43+
44+
# Inverse of row_degrees
45+
# Fold alpha constant into S
46+
if row_degrees is None:
47+
S = A.reduce_rowwise().new(float, name="S")
48+
S << alpha / S
49+
else:
50+
S = (alpha / row_degrees).new(name="S")
51+
52+
if A.ss.is_iso:
53+
# Fold iso-value of A into S
54+
# This lets us use the plus_first semiring, which is faster
55+
iso_value = A.ss.iso_value
56+
if iso_value != 1:
57+
S *= iso_value
58+
semiring = plus_first[float]
59+
else:
60+
semiring = plus_times[float]
61+
62+
is_dangling = S.nvals < N
63+
if is_dangling:
64+
dangling_mask = Vector.new(float, N, name="dangling_mask")
65+
dangling_mask(mask=~S.S) << 1.0
66+
# Fold alpha constant into dangling_weights (or dangling_mask)
67+
if dangling is not None:
68+
dangling_weights = (alpha / dangling.reduce(allow_empty=False).value * dangling).new(
69+
name="dangling_weights"
70+
)
71+
elif personalization is None:
72+
# Fast case (and common case); is iso-valued
73+
dangling_mask(mask=dangling_mask.S) << alpha * p
74+
else:
75+
dangling_weights = (alpha * p).new(name="dangling_weights")
76+
77+
# Fold constant into p
78+
p *= 1 - alpha
79+
80+
# Power iteration: make up to max_iter iterations
81+
xprev = Vector.new(float, N, name="x_prev")
82+
w = Vector.new(float, N, name="w")
83+
for _ in range(max_iter):
84+
xprev, x = x, xprev
85+
86+
# x << alpha * ((xprev * S) @ A + "dangling_weights") + (1 - alpha) * p
87+
x << p
88+
if is_dangling:
89+
if dangling is None and personalization is None:
90+
# Fast case: add a scalar; x is still iso-valued (b/c p is also scalar)
91+
x += xprev @ dangling_mask
92+
else:
93+
# Add a vector
94+
x += plus_first(xprev @ dangling_mask) * dangling_weights
95+
w << xprev * S
96+
x += semiring(w @ A) # plus_first if A.ss.is_iso else plus_times
97+
98+
# Check convergence, l1 norm: err = sum(abs(xprev - x))
99+
xprev << binary.minus(xprev | x, require_monoid=False)
100+
xprev << unary.abs(xprev)
101+
err = xprev.reduce().value
102+
if err < N * tol:
103+
x.name = name
104+
return x
105+
raise nx.PowerIterationFailedConvergence(max_iter)
106+
107+
108+
def pagerank(
109+
G,
110+
alpha=0.85,
111+
personalization=None,
112+
max_iter=100,
113+
tol=1e-06,
114+
nstart=None,
115+
weight="weight",
116+
dangling=None,
117+
):
118+
warn("", DeprecationWarning, stacklevel=2)
119+
N = len(G)
120+
if N == 0:
121+
return {}
122+
node_ids = OrderedDict((k, i) for i, k in enumerate(G))
123+
A = gb.io.from_networkx(G, nodelist=node_ids, weight=weight, dtype=float)
124+
125+
x = p = dangling_weights = None
126+
# Initial vector (we'll normalize later)
127+
if nstart is not None:
128+
indices, values = zip(*((node_ids[key], val) for key, val in nstart.items()))
129+
x = Vector.from_values(indices, values, size=N, dtype=float, name="nstart")
130+
# Personalization vector (we'll normalize later)
131+
if personalization is not None:
132+
indices, values = zip(*((node_ids[key], val) for key, val in personalization.items()))
133+
p = Vector.from_values(indices, values, size=N, dtype=float, name="personalization")
134+
# Dangling nodes (we'll normalize later)
135+
row_degrees = A.reduce_rowwise().new(name="row_degrees")
136+
if dangling is not None:
137+
if row_degrees.nvals < N: # is_dangling
138+
indices, values = zip(*((node_ids[key], val) for key, val in dangling.items()))
139+
dangling_weights = Vector.from_values(
140+
indices, values, size=N, dtype=float, name="dangling"
141+
)
142+
result = pagerank_core(
143+
A,
144+
alpha=alpha,
145+
personalization=p,
146+
max_iter=max_iter,
147+
tol=tol,
148+
nstart=x,
149+
dangling=dangling_weights,
150+
row_degrees=row_degrees,
151+
)
152+
if result.nvals != N:
153+
# Not likely, but fill with 0 just in case
154+
result(mask=~result.S) << 0
155+
return dict(zip(node_ids, result.to_values()[1]))

graphblas_algorithms/tests/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import inspect
2+
3+
import networkx as nx
4+
5+
from graphblas_algorithms import pagerank
6+
7+
nx_pagerank = nx.pagerank
8+
nx_pagerank_scipy = nx.pagerank_scipy
9+
10+
nx.pagerank = pagerank
11+
nx.pagerank_scipy = pagerank
12+
nx.algorithms.link_analysis.pagerank_alg.pagerank_scipy = pagerank
13+
14+
15+
def test_signatures():
16+
nx_sig = inspect.signature(nx_pagerank)
17+
sig = inspect.signature(pagerank)
18+
assert nx_sig == sig
19+
20+
21+
from networkx.algorithms.link_analysis.tests.test_pagerank import * # isort:skip

0 commit comments

Comments
 (0)