Merge branch 'main' into snyk-fix-298f1ab883fce6d984a61851ecb67bfc

ceteri · web-flow · commit 32cafdf658f6 · 2024-02-21T13:51:31.000-08:00
diff --git a/pytextrank/base.py b/pytextrank/base.py
@@ -309,7 +309,7 @@ def __init__ (
         # effectively, performs the same work as the `reset()` method;
         # called explicitly here for the sake of type annotations
         self.elapsed_time: float = 0.0
-        self.lemma_graph: nx.DiGraph = nx.DiGraph()
+        self.lemma_graph: nx.Graph = nx.Graph()
         self.phrases: typing.List[Phrase] = []
         self.ranks: typing.Dict[Lemma, float] = {}
         self.seen_lemma: typing.Dict[Lemma, typing.Set[int]] = OrderedDict()
@@ -323,7 +323,7 @@ def reset (
 removing any pre-existing state.
         """
         self.elapsed_time = 0.0
-        self.lemma_graph = nx.DiGraph()
+        self.lemma_graph = nx.Graph()
         self.phrases = []
         self.ranks = {}
         self.seen_lemma = OrderedDict()
@@ -400,15 +400,15 @@ def get_personalization (  # pylint: disable=R0201
 
     def _construct_graph (
         self
-        ) -> nx.DiGraph:
+        ) -> nx.Graph:
         """
 Construct the
 [*lemma graph*](https://derwen.ai/docs/ptr/glossary/#lemma-graph).
 
     returns:
 a directed graph representing the lemma graph
         """
-        g = nx.DiGraph()
+        g = nx.Graph()
 
         # add nodes made of Lemma(lemma, pos)
         g.add_nodes_from(self.node_list)
@@ -571,6 +571,8 @@ def _calc_discounted_normalised_rank (
     returns:
 normalized rank metric
         """
+        if len(span) < 1 :
+            return 0.0
         non_lemma = len([tok for tok in span if tok.pos_ not in self.pos_kept])
         non_lemma_discount = len(span) / (len(span) + (2.0 * non_lemma) + 1.0)
 
@@ -877,7 +879,7 @@ def write_dot (
     path:
 path for the output file; defaults to `"graph.dot"`
         """
-        dot = graphviz.Digraph()
+        dot = graphviz.Graph()
 
         for lemma in self.lemma_graph.nodes():
             rank = self.ranks[lemma]
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -5,6 +5,7 @@ coverage
 flask
 grayskull
 jupyterlab >= 3.1.4
+jupyter-server >= 2.11.2 # not directly required, pinned by Snyk to avoid a vulnerability
 mistune
 mkdocs-git-revision-date-plugin
 mkdocs-material
@@ -22,6 +23,6 @@ pymdown-extensions
 selenium
 setuptools >= 65.5.1
 twine
-wheel >= 0.38.0
-tornado>=6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability
-werkzeug>=3.0.1 # not directly required, pinned by Snyk to avoid a vulnerability
+tornado >= 6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability
+werkzeug >= 3.0.1 # not directly required, pinned by Snyk to avoid a vulnerability
+wheel >= 0.38.0
diff --git a/tests/test_base.py b/tests/test_base.py
@@ -154,13 +154,13 @@ def test_stop_words ():
             for phrase in doc._.phrases[:5]
             ]
 
-        assert "words" in phrases
+        assert "sentences" in phrases
 
     # add `"word": ["NOUN"]` to the *stop words*, to remove instances
     # of `"word"` or `"words"` then see how the ranked phrases differ?
 
     nlp2 = spacy.load("en_core_web_sm")
-    nlp2.add_pipe("textrank", config={ "stopwords": { "word": ["NOUN"] } })
+    nlp2.add_pipe("textrank", config={ "stopwords": { "sentence": ["NOUN"] } })
 
     with open("dat/gen.txt", "r") as f:
         doc = nlp2(f.read())