diff --git a/src/sphinxnotes/snippet/cache.py b/src/sphinxnotes/snippet/cache.py index cc2ed0a..78a2efa 100644 --- a/src/sphinxnotes/snippet/cache.py +++ b/src/sphinxnotes/snippet/cache.py @@ -93,7 +93,7 @@ def post_purge(self, key: DocID, value: list[Item]) -> None: def get_by_index_id(self, key: IndexID) -> Item | None: """Like get(), but use IndexID as key.""" doc_id, item_index = self.index_id_to_doc_id.get(key, (None, None)) - if not doc_id or not item_index: + if not doc_id or item_index is None: return None return self[doc_id][item_index] @@ -105,4 +105,4 @@ def gen_index_id(self) -> str: def stringify(self, key: DocID, value: list[Item]) -> str: """Overwrite PDict.stringify.""" - return key[1] + return key[1] # docname diff --git a/src/sphinxnotes/snippet/cli.py b/src/sphinxnotes/snippet/cli.py index c2f2d0e..a52b17a 100644 --- a/src/sphinxnotes/snippet/cli.py +++ b/src/sphinxnotes/snippet/cli.py @@ -60,9 +60,9 @@ def main(argv: list[str] = sys.argv[1:]): formatter_class=HelpFormatter, epilog=dedent(""" snippet tags: - d (document) a reST document - s (section) a reST section - c (code) snippet with code blocks + d (document) a document + s (section) a section + c (code) a code block * (any) wildcard for any snippet"""), ) parser.add_argument( @@ -140,7 +140,12 @@ def main(argv: list[str] = sys.argv[1:]): '--text', '-t', action='store_true', - help='get source reStructuredText of snippet', + help='get text representation of snippet', + ) + getparser.add_argument( + '--src', + action='store_true', + help='get source text of snippet', ) getparser.add_argument( '--url', @@ -273,7 +278,9 @@ def p(*args, **opts): p('no such index ID', file=sys.stderr) sys.exit(1) if args.text: - p('\n'.join(item.snippet.rst)) + p('\n'.join(item.snippet.text)) + if args.src: + p('\n'.join(item.snippet.source)) if args.docname: p(item.snippet.docname) if args.file: diff --git a/src/sphinxnotes/snippet/ext.py b/src/sphinxnotes/snippet/ext.py index b320ef5..7a863cc 100644 --- a/src/sphinxnotes/snippet/ext.py +++ b/src/sphinxnotes/snippet/ext.py @@ -26,7 +26,7 @@ from collections.abc import Iterator from .config import Config -from .snippets import Snippet, WithTitle, Document, Section +from .snippets import Snippet, WithTitle, Document, Section, Code from .picker import pick from .cache import Cache, Item from .keyword import Extractor @@ -45,53 +45,38 @@ def extract_tags(s: Snippet) -> str: tags += 'd' elif isinstance(s, Section): tags += 's' + elif isinstance(s, Code): + tags += 'c' return tags def extract_excerpt(s: Snippet) -> str: if isinstance(s, Document) and s.title is not None: - return '<' + s.title.text + '>' + return '<' + s.title + '>' elif isinstance(s, Section) and s.title is not None: - return '[' + s.title.text + ']' + return '[' + s.title + ']' + elif isinstance(s, Code): + return '`' + (s.lang + ':').ljust(8, ' ') + ' ' + s.desc + '`' return '' def extract_keywords(s: Snippet) -> list[str]: keywords = [s.docname] - # TODO: Deal with more snippet if isinstance(s, WithTitle) and s.title is not None: - keywords.extend(extractor.extract(s.title.text, strip_stopwords=False)) + keywords.extend(extractor.extract(s.title, strip_stopwords=False)) + if isinstance(s, Code): + keywords.extend(extractor.extract(s.desc, strip_stopwords=False)) return keywords -def is_document_matched( - pats: dict[str, list[str]], docname: str -) -> dict[str, list[str]]: - """Whether the docname matched by given patterns pats""" - new_pats = {} - for tag, ps in pats.items(): +def _get_document_allowed_tags(pats: dict[str, list[str]], docname: str) -> str: + """Return the tags of snippets that are allowed to be picked from the document.""" + allowed_tags = '' + for tags, ps in pats.items(): for pat in ps: if re.match(pat, docname): - new_pats.setdefault(tag, []).append(pat) - return new_pats - - -def is_snippet_matched(pats: dict[str, list[str]], s: [Snippet], docname: str) -> bool: - """Whether the snippet's tags and docname matched by given patterns pats""" - if '*' in pats: # Wildcard - for pat in pats['*']: - if re.match(pat, docname): - return True - - not_in_pats = True - for k in extract_tags(s): - if k not in pats: - continue - not_in_pats = False - for pat in pats[k]: - if re.match(pat, docname): - return True - return not_in_pats + allowed_tags += tags + return allowed_tags def on_config_inited(app: Sphinx, appcfg: SphinxConfig) -> None: @@ -113,6 +98,7 @@ def on_env_get_outdated( removed: set[str], ) -> list[str]: # Remove purged indexes and snippetes from db + assert cache is not None for docname in removed: del cache[(app.config.project, docname)] return [] @@ -126,15 +112,16 @@ def on_doctree_resolved(app: Sphinx, doctree: nodes.document, docname: str) -> N ) return - pats = is_document_matched(app.config.snippet_patterns, docname) - if len(pats) == 0: - logger.debug('[snippet] skip picking because %s is not matched', docname) + allowed_tags = _get_document_allowed_tags(app.config.snippet_patterns, docname) + if not allowed_tags: + logger.debug('[snippet] skip picking: no tag allowed for document %s', docname) return doc = [] snippets = pick(app, doctree, docname) for s, n in snippets: - if not is_snippet_matched(pats, s, docname): + # FIXME: Better filter logic. + if extract_tags(s) not in allowed_tags: continue tpath = [x.astext() for x in titlepath.resolve(app.env, docname, n)] if isinstance(s, Section): @@ -162,6 +149,7 @@ def on_doctree_resolved(app: Sphinx, doctree: nodes.document, docname: str) -> N def on_builder_finished(app: Sphinx, exception) -> None: + assert cache is not None cache.dump() diff --git a/src/sphinxnotes/snippet/integration/binding.nvim b/src/sphinxnotes/snippet/integration/binding.nvim index 6f05c5f..0414214 100644 --- a/src/sphinxnotes/snippet/integration/binding.nvim +++ b/src/sphinxnotes/snippet/integration/binding.nvim @@ -11,7 +11,7 @@ function! g:SphinxNotesSnippetListAndView() function! s:CallView(selection) call g:SphinxNotesSnippetView(s:SplitID(a:selection)) endfunction - call g:SphinxNotesSnippetList(function('s:CallView'), 'ds') + call g:SphinxNotesSnippetList(function('s:CallView'), '*') endfunction " https://github.com/anhmv/vim-float-window/blob/master/plugin/float-window.vim @@ -40,7 +40,7 @@ function! g:SphinxNotesSnippetView(id) " Press enter to return nmap :call nvim_win_close(g:sphinx_notes_snippet_win, v:true) - let cmd = [s:snippet, 'get', '--text', a:id] + let cmd = [s:snippet, 'get', '--src', a:id] call append(line('$'), ['.. hint:: Press to return']) execute '$read !' . '..' execute '$read !' . join(cmd, ' ') diff --git a/src/sphinxnotes/snippet/integration/binding.sh b/src/sphinxnotes/snippet/integration/binding.sh index 474b382..f831d58 100644 --- a/src/sphinxnotes/snippet/integration/binding.sh +++ b/src/sphinxnotes/snippet/integration/binding.sh @@ -6,7 +6,7 @@ # :Version: 20240828 function snippet_view() { - selection=$(snippet_list --tags ds) + selection=$(snippet_list) [ -z "$selection" ] && return # Make sure we have $PAGER @@ -18,7 +18,7 @@ function snippet_view() { fi fi - echo "$SNIPPET get --text $selection | $PAGER" + echo "$SNIPPET get --src $selection | $PAGER" } function snippet_edit() { diff --git a/src/sphinxnotes/snippet/picker.py b/src/sphinxnotes/snippet/picker.py index 16b2941..667bcc1 100644 --- a/src/sphinxnotes/snippet/picker.py +++ b/src/sphinxnotes/snippet/picker.py @@ -15,7 +15,7 @@ from sphinx.util import logging -from .snippets import Snippet, Section, Document +from .snippets import Snippet, Section, Document, Code if TYPE_CHECKING: from sphinx.application import Sphinx @@ -25,81 +25,71 @@ def pick( app: Sphinx, doctree: nodes.document, docname: str -) -> list[tuple[Snippet, nodes.section]]: +) -> list[tuple[Snippet, nodes.Element]]: """ - Pick snippets from document, return a list of snippet and the section - it belongs to. + Pick snippets from document, return a list of snippet and the related node. + + As :class:`Snippet` can not hold any refs to doctree, we additionly returns + the related nodes here. To ensure the caller can back reference to original + document node and do more things (e.g. generate title path). """ # FIXME: Why doctree.source is always None? if not doctree.attributes.get('source'): - logger.debug('Skipped document without source') + logger.debug('Skip document without source') return [] metadata = app.env.metadata.get(docname, {}) if 'no-search' in metadata or 'nosearch' in metadata: - logger.debug('Skipped document with nosearch metadata') + logger.debug('Skip document with nosearch metadata') return [] - snippets: list[tuple[Snippet, nodes.section]] = [] - - # Pick document - toplevel_section = doctree.next_node(nodes.section) - if toplevel_section: - snippets.append((Document(doctree), toplevel_section)) - else: - logger.warning('can not pick document without child section: %s', doctree) - - # Pick sections - section_picker = SectionPicker(doctree) - doctree.walkabout(section_picker) - snippets.extend(section_picker.sections) + # Walk doctree and pick snippets. + picker = SnippetPicker(doctree) + doctree.walkabout(picker) - return snippets + return picker.snippets -class SectionPicker(nodes.SparseNodeVisitor): +class SnippetPicker(nodes.SparseNodeVisitor): """Node visitor for picking snippets from document.""" - #: Constant list of unsupported languages (:class:`pygments.lexers.Lexer`) - UNSUPPORTED_LANGUAGES: list[str] = ['default'] + #: List of picked snippets and the section it belongs to + snippets: list[tuple[Snippet, nodes.Element]] - #: List of picked section snippets and the section it belongs to - sections: list[tuple[Section, nodes.section]] + #: Stack of nested sections. + _sections: list[nodes.section] - _section_has_code_block: bool - _section_level: int - - def __init__(self, document: nodes.document) -> None: - super().__init__(document) - self.sections = [] - self._section_has_code_block = False - self._section_level = 0 + def __init__(self, doctree: nodes.document) -> None: + super().__init__(doctree) + self.snippets = [] + self._sections = [] ################### # Visitor methods # ################### def visit_literal_block(self, node: nodes.literal_block) -> None: - if node['language'] in self.UNSUPPORTED_LANGUAGES: + try: + code = Code(node) + except ValueError as e: + logger.debug(f'skip {node}: {e}') raise nodes.SkipNode - self._has_code_block = True + self.snippets.append((code, node)) def visit_section(self, node: nodes.section) -> None: - self._section_level += 1 + self._sections.append(node) def depart_section(self, node: nodes.section) -> None: - self._section_level -= 1 - self._has_code_block = False + section = self._sections.pop() + assert section == node # Skip non-leaf section without content if self._is_empty_non_leaf_section(node): return - # Skip toplevel section, we generate :class:`Document` for it - if self._section_level == 0: - return - - # TODO: code block - self.sections.append((Section(node), node)) + if len(self._sections) == 0: + self.snippets.append((Document(self.document), node)) + else: + self.snippets.append((Section(node), node)) def unknown_visit(self, node: nodes.Node) -> None: pass # Ignore any unknown node diff --git a/src/sphinxnotes/snippet/snippets.py b/src/sphinxnotes/snippet/snippets.py index 133e91c..f14661a 100644 --- a/src/sphinxnotes/snippet/snippets.py +++ b/src/sphinxnotes/snippet/snippets.py @@ -12,6 +12,8 @@ from typing import TYPE_CHECKING import itertools from os import path +import sys +from pygments.lexers.shell import BashSessionLexer from docutils import nodes @@ -22,24 +24,35 @@ class Snippet(object): """ Snippet is structured fragments extracted from a single Sphinx document - (can also be said to be a reStructuredText file). + (usually, also a single reStructuredText file). :param nodes: nodes of doctree that make up this snippet. + + .. warning:: + + Snippet will be persisted to disk via pickle, to keep it simple, + it CAN NOT holds reference to any doctree ``nodes`` + (or even any non-std module). """ #: docname where the snippet is located, can be referenced by # :rst:role:`doc`. docname: str - #: Absolute path of the source file. + #: Absolute path to the source file. file: str - #: Line number range of snippet, in the source file which is left closed - #: and right opened. + #: Line number range of source file (:attr:`Snippet.file`), + #: left closed and right opened. lineno: tuple[int, int] - #: The original reStructuredText of snippet - rst: list[str] + #: The source text read from source file (:attr:`Snippet.file`), + # in Markdown or reStructuredText. + source: list[str] + + #: Text representation of the snippet, usually generated form + # :meth:`nodes.Element.astext`. + text: list[str] #: The possible identifier key of snippet, which is picked from nodes' #: (or nodes' parent's) `ids attr`_. @@ -47,28 +60,42 @@ class Snippet(object): #: .. _ids attr: https://docutils.sourceforge.io/docs/ref/doctree.html#ids refid: str | None - def __init__(self, *nodes: nodes.Node) -> None: + def __init__(self, *nodes: nodes.Element) -> None: assert len(nodes) != 0 - env: BuildEnvironment = nodes[0].document.settings.env - self.file = nodes[0].source - self.docname = env.path2doc(self.file) + env: BuildEnvironment = nodes[0].document.settings.env # type: ignore - lineno = [float('inf'), -float('inf')] + file, docname = None, None + for node in nodes: + if (src := nodes[0].source) and path.exists(src): + file = src + docname = env.path2doc(file) + break + if not file or not docname: + raise ValueError(f'Nodes {nodes} lacks source file or docname') + self.file = file + self.docname = docname + + lineno = [sys.maxsize, -sys.maxsize] for node in nodes: if not node.line: continue # Skip node that have None line, I dont know why lineno[0] = min(lineno[0], _line_of_start(node)) lineno[1] = max(lineno[1], _line_of_end(node)) - self.lineno = lineno + self.lineno = (lineno[0], lineno[1]) - lines = [] + source = [] with open(self.file, 'r') as f: start = self.lineno[0] - 1 stop = self.lineno[1] - 1 for line in itertools.islice(f, start, stop): - lines.append(line.strip('\n')) - self.rst = lines + source.append(line.strip('\n')) + self.source = source + + text = [] + for node in nodes: + text.extend(node.astext().split('\n')) + self.text = text # Find exactly one ID attr in nodes self.refid = None @@ -85,40 +112,63 @@ def __init__(self, *nodes: nodes.Node) -> None: break -class Text(Snippet): - #: Text of snippet - text: str - - def __init__(self, node: nodes.Node) -> None: - super().__init__(node) - self.text = node.astext() - - -class Code(Text): +class Code(Snippet): #: Language of code block - language: str - #: Caption of code block - caption: str | None + lang: str + #: Description of code block, usually the text of preceding paragraph + desc: str def __init__(self, node: nodes.literal_block) -> None: assert isinstance(node, nodes.literal_block) - super().__init__(node) - self.language = node['language'] - self.caption = node.get('caption') - -class Title(Text): - def __init__(self, node: nodes.title) -> None: - assert isinstance(node, nodes.title) - super().__init__(node) + self.lang = node['language'] + if self.lang not in BashSessionLexer.aliases: # TODO: support more language + raise ValueError( + f'Language {self.lang} is not supported', + ) + + self.desc = '' + # Use the preceding paragraph as descritpion. We usually write some + # descritpions before a code block. For example, The ``::`` syntax is + # a common way to create code block:: + # + # | Foo:: | + # | | Foo: + # | Bar | + # | | Bar + # + # In this case, the paragraph "Foo:" is the descritpion of the code block. + # This convention also applies to the code, code-block, sourcecode directive. + if isinstance(para := node.previous_sibling(), nodes.paragraph): + # For better display, the trailing colon is removed. + # TODO: https://en.wikipedia.org/wiki/Colon_(punctuation)#Computing + self.desc += para.astext().replace('\n', ' ').rstrip(':::︁︓﹕') + if caption := node.get('caption'): + # Use caption as descritpion. + # All of code-block, sourcecode and code directives have caption option. + # https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block + self.desc += caption + if not self.desc: + raise ValueError( + f'Node f{node} lacks description: a preceding paragraph or a caption' + ) + + if isinstance(para, nodes.paragraph): + # If we have a paragraph preceding code block, include it. + super().__init__(para, node) + # Fixup text field, it should be pure code. + self.text = node.astext().split('\n') + else: + super().__init__(node) class WithTitle(object): - title: Title | None + title: str - def __init__(self, node: nodes.Node) -> None: - title_node = node.next_node(nodes.title) - self.title = Title(title_node) if title_node else None + def __init__(self, node: nodes.Element) -> None: + if not (title := node.next_node(nodes.title)): + raise ValueError(f'Node f{node} lacks title') + self.title = title.astext() class Section(Snippet, WithTitle): @@ -169,7 +219,7 @@ def _line_of_start(node: nodes.Node) -> int: return node.line -def _line_of_end(node: nodes.Node) -> int | None: +def _line_of_end(node: nodes.Node) -> int: next_node = node.next_node(descend=False, siblings=True, ascend=True) while next_node: if next_node.line: @@ -184,7 +234,7 @@ def _line_of_end(node: nodes.Node) -> int | None: siblings=True, ) # No line found, return the max line of source file - if node.source: + if node.source and path.exists(node.source): with open(node.source) as f: - return sum(1 for line in f) + return sum(1 for _ in f) raise AttributeError('None source attr of node %s' % node) diff --git a/src/sphinxnotes/snippet/utils/titlepath.py b/src/sphinxnotes/snippet/utils/titlepath.py index 1e827d3..eaa6bc3 100644 --- a/src/sphinxnotes/snippet/utils/titlepath.py +++ b/src/sphinxnotes/snippet/utils/titlepath.py @@ -14,17 +14,16 @@ from docutils import nodes if TYPE_CHECKING: - from sphinx.enviornment import BuilderEnviornment + from sphinx.environment import BuildEnvironment def resolve( - env: BuilderEnviornment, docname: str, node: nodes.Node + env: BuildEnvironment, docname: str, node: nodes.Element ) -> list[nodes.title]: return resolve_section(node) + resolve_document(env, docname) -def resolve_section(node: nodes.section) -> list[nodes.title]: - # FIXME: doc is None +def resolve_section(node: nodes.Element) -> list[nodes.title]: titlenodes = [] while node: if len(node) > 0 and isinstance(node[0], nodes.title): @@ -33,10 +32,8 @@ def resolve_section(node: nodes.section) -> list[nodes.title]: return titlenodes -def resolve_document(env: BuilderEnviornment, docname: str) -> list[nodes.title]: - """ - .. note:: Title of document itself does not included in the returned list - """ +def resolve_document(env: BuildEnvironment, docname: str) -> list[nodes.title]: + """NOTE: Title of document itself does not included in the returned list""" titles = [] master_doc = env.config.master_doc v = docname.split('/')