sciunto-org · rbirke · Oct 28, 2024 · Oct 28, 2024 · Oct 28, 2024 · Oct 28, 2024
diff --git a/bibtexparser/__init__.py b/bibtexparser/__init__.py
@@ -3,6 +3,7 @@
 import bibtexparser.model
 from bibtexparser.entrypoint import parse_file
 from bibtexparser.entrypoint import parse_string
+from bibtexparser.entrypoint import parse_url
 from bibtexparser.entrypoint import write_file
 from bibtexparser.entrypoint import write_string
 from bibtexparser.library import Library

diff --git a/bibtexparser/entrypoint.py b/bibtexparser/entrypoint.py
@@ -132,7 +132,34 @@ def parse_file(
             bibtex_str, parse_stack=parse_stack, append_middleware=append_middleware
         )
 
+def parse_url(
+    url: str,
+    parse_stack: Optional[Iterable[Middleware]] = None,
+    append_middleware: Optional[Iterable[Middleware]] = None,
+    encoding: str = "UTF-8",
+) -> Library:
+    """Parse a BibTeX file from an URL
+
+    :param url: Url to BibTeX file
+    :param parse_stack:
+        List of middleware to apply to the database after splitting.
+        If ``None`` (default), a default stack will be used providing simple standard functionality.
 
+    :param append_middleware:
+        List of middleware to append to the default stack
+        (ignored if a not-``None`` parse_stack is passed).
+
+    :param encoding: Encoding of the .bib file. Default encoding is ``"UTF-8"``.
+    :return: Library: Parsed BibTeX library
+    """
+    import urllib.request
+
+    with urllib.request.urlopen(url) as f:
+        bibtex_str = f.read().decode(encoding)
+        return parse_string(
+            bibtex_str, parse_stack=parse_stack, append_middleware=append_middleware
+        )
+
 def write_file(
     file: Union[str, TextIO],
     library: Library,

diff --git a/bibtexparser/library.py b/bibtexparser/library.py
@@ -6,11 +6,13 @@
 from .model import DuplicateBlockKeyBlock
 from .model import Entry
 from .model import ExplicitComment
+from .model import Field
 from .model import ImplicitComment
 from .model import ParsingFailedBlock
 from .model import Preamble
 from .model import String
 
+
 # TODO Use functools.lru_cache for library properties (which create lists when called)
 
 
@@ -162,6 +164,11 @@ def failed_blocks(self) -> List[ParsingFailedBlock]:
         """All blocks that could not be parsed, preserving order of insertion."""
         return [b for b in self._blocks if isinstance(b, ParsingFailedBlock)]
 
+    @property
+    def duplicate_blocks(self) -> List[DuplicateBlockKeyBlock]:
+        """All blocks that could not be parsed, preserving order of insertion."""
+        return [b for b in self._blocks if isinstance(b, DuplicateBlockKeyBlock)]
+
     @property
     def strings(self) -> List[String]:
         """All @string blocks in the library, preserving order of insertion."""
@@ -195,3 +202,44 @@ def comments(self) -> List[Union[ExplicitComment, ImplicitComment]]:
         return [
             block for block in self._blocks if isinstance(block, (ExplicitComment, ImplicitComment))
         ]
+
+    def filter(self,
+               filter: Dict,
+               case_sensitive = False
+            ) -> List[Entry]:
+        """ Return filtered list of entries. Filter is a dict."""
+        entries = []
+
+
+        # Transform List in set
+        for k in filter.keys():
+            if not isinstance(filter[k], set):
+                if isinstance(filter[k], List):
+                    filter[k] = set([x.lower() if not case_sensitive and isinstance(x, str) else x for x in filter[k]])
+                else:
+                    x = filter[k].lower() if not case_sensitive and isinstance(filter[k], str) else filter[k]
+                    filter[k] = set([x])
+
+        for block in self._blocks:
+            if isinstance(block, Entry):
+                found = True
+                for key in filter.keys():
+                    if key in block.fields_dict.keys():
+                        if isinstance(block.fields_dict[key], Field):
+                            if isinstance(block.fields_dict[key].value, List):
+                                bset = set([x.lower() if not case_sensitive and isinstance(x, str) else x for x in block.fields_dict[key].value])
+                            else:
+                                x = block.fields_dict[key].value.lower() if not case_sensitive and isinstance(block.fields_dict[key].value, str) else block.fields_dict[key].value
+                                bset = set([x])
+
+                            if not set(bset).intersection(filter[key]):
+                                found = False
+                                break
+                        else:
+                            found = False
+                    else:
+                        found = False
+                if found:
+                    entries.append(block)
+
+        return entries
diff --git a/bibtexparser/middlewares/__init__.py b/bibtexparser/middlewares/__init__.py
@@ -4,6 +4,7 @@
 from bibtexparser.middlewares.interpolate import ResolveStringReferencesMiddleware
 from bibtexparser.middlewares.latex_encoding import LatexDecodingMiddleware
 from bibtexparser.middlewares.latex_encoding import LatexEncodingMiddleware
+from bibtexparser.middlewares.lists import SeparateCSVLists
 from bibtexparser.middlewares.middleware import BlockMiddleware
 from bibtexparser.middlewares.middleware import LibraryMiddleware
 from bibtexparser.middlewares.month import MonthAbbreviationMiddleware
@@ -15,6 +16,7 @@
 from bibtexparser.middlewares.names import SeparateCoAuthors
 from bibtexparser.middlewares.names import SplitNameParts
 from bibtexparser.middlewares.sorting_blocks import SortBlocksByTypeAndKeyMiddleware
+from bibtexparser.middlewares.sorting_blocks import  SortBlocksByYearMonthDayMiddleware
 from bibtexparser.middlewares.sorting_entry_fields import SortFieldsAlphabeticallyMiddleware
 from bibtexparser.middlewares.sorting_entry_fields import SortFieldsCustomMiddleware
 

diff --git a/bibtexparser/middlewares/lists.py b/bibtexparser/middlewares/lists.py
@@ -0,0 +1,61 @@
+import abc
+from typing import List, Literal, Tuple
+
+from bibtexparser.model import Block, Entry, Field
+
+from .middleware import BlockMiddleware
+
+class _ListTransformerMiddleware(BlockMiddleware, abc.ABC):
+    """Internal utility class - superclass for all name-transforming middlewares.
+
+    :param allow_inplace_modification: See corresponding property.
+    :param name_fields: The fields that contain names, considered by this middleware."""
+
+    def __init__(
+        self,
+        allow_inplace_modification: bool = True,
+        field_names: Tuple[str] = (),
+    ):
+        super().__init__(
+            allow_inplace_modification=allow_inplace_modification,
+            allow_parallel_execution=True,
+        )
+        self._field_names = field_names
+
+    @property
+    def field_names(self) -> Tuple[str]:
+        """The fields that contain names, considered by this middleware."""
+        return self._field_names
+
+    @abc.abstractmethod
+    def _transform_field_value(self, name):
+        raise NotImplementedError("called abstract method")
+
+    # docstr-coverage: inherited
+    def transform_entry(self, entry: Entry, *args, **kwargs) -> Block:
+        field: Field
+
+        for field in entry.fields:
+            if field.key in self.field_names:
+                field.value = self._transform_field_value(field.value)
+        return entry
+
+
+def split_comma_separated_list(string):
+    """Helper function to split a list of comma separated values."""
+    import re
+    pattern = re.compile(r'\s*,\s*') # Remove extra spaces before and after comma
+    return re.sub(pattern, ',', string).split(",")
+
+
+class SeparateCSVLists(_ListTransformerMiddleware):
+    """Middleware to separate comma-separated values in fields."""
+
+    # docstr-coverage: inherited
+    @classmethod
+    def metadata_key(cls) -> str:
+        return "separate_lists"
+
+    # docstr-coverage: inherited
+    def _transform_field_value(self, string) -> List[str]:
+        return split_comma_separated_list(string)
diff --git a/bibtexparser/middlewares/sorting_blocks.py b/bibtexparser/middlewares/sorting_blocks.py
@@ -120,3 +120,96 @@ def _sort_key(block: Block):
 
             blocks.sort(key=_sort_key)
             return Library(blocks=blocks)
+
+
+class SortBlocksByYearMonthDayMiddleware(LibraryMiddleware):
+    """Sorts the blocks of a library by year, month and day. 
+
+    :param descending: uses descending ordering (ascending by default)
+    :param preserve_comments_on_top: comments remain above same block (default True)
+    """
+
+    def __init__(
+        self,
+        preserve_comments_on_top: bool = True,
+        descending = False
+    ):
+        self._preserve_comments_on_top = preserve_comments_on_top
+        self._descending = descending
+
+        # In-place modification is not yet supported, we make this explicit here,
+        super().__init__(allow_inplace_modification=False)
+
+    @staticmethod
+    # Sort blocks by year and month (default 0 in case entry has no year or month)
+    # Month should be an integer (recommended to use MonthIntMiddleware beforehand)
+    def _sort_key(block: Block):
+        month = 0
+        year = 0
+        day = 0
+        try:
+            try:
+                v = block.fields_dict["day"].value
+                if isinstance(v, str) and v.isdigit():
+                    v = int(v)
+                if isinstance(v, int):
+                    if v >= 1 or v <= 31:
+                        day = v
+            except KeyError:
+                # No year field
+                pass
+            try:
+                v = block.fields_dict["month"].value
+                if isinstance(v, str) and v.isdigit():
+                    v = int(v)
+                if isinstance(v, int):
+                    if v >= 1 or v <= 12:
+                        month = v
+            except KeyError:
+                # No month field
+                pass
+            try:
+                year = int(block.fields_dict["year"].value)
+            except KeyError:
+                # No year field
+                pass
+        except AttributeError:
+            # No fields_dict (e.g. Comments)
+            pass
+        return year, month, day
+
+    # docstr-coverage: inherited
+    def transform(self, library: Library) -> Library:
+        blocks = deepcopy(library.blocks)
+
+        if self._preserve_comments_on_top:
+            # We start creating a new list of block_junks (made of comments and entries)
+            block_junks = []
+            current_junk = _BlockJunk()
+            for block in blocks:
+                current_junk.blocks.append(block)
+                current_junk.sort_key = self._sort_key(block)
+
+                if not (
+                    isinstance(block, ExplicitComment) or isinstance(block, ImplicitComment)
+                ):
+                    # We added a non-comment block, hence we finish the junk and
+                    # start a new one
+                    block_junks.append(current_junk)
+                    current_junk = _BlockJunk()
+
+            if current_junk.blocks:
+                # That would be a junk with only comments, but we add it at the end for completeness
+                block_junks.append(current_junk)
+
+            def _sort_key(block_junk):
+                return block_junk.sort_key
+
+            block_junks.sort(key=_sort_key, reverse=self._descending)
+            return Library(
+                blocks=[block for block_junk in block_junks for block in block_junk.blocks]
+            )
+
+        else:
+            blocks.sort(key=self._sort_key)
+            return Library(blocks=blocks)