Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 95 additions & 77 deletions crates/ty_ide/src/docstring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@

mod markdown;
mod rest;
#[cfg_attr(
not(test),
expect(dead_code, reason = "wired into reST rendering in a follow-up change")
)]
mod sections;

use indexmap::IndexMap;
Expand Down Expand Up @@ -191,12 +187,20 @@ fn render_markdown(docstring: &str) -> String {
// break out of it, even if they're writing python documentation about markdown
// code fences and are showing off how you can use more than 3 backticks.
const FENCE: &str = "```````````";

let docstring = if rest::may_contain_top_level_field_list(docstring) {
rest::Docstring::parse(docstring).render_markdown()
} else {
Cow::Borrowed(docstring)
};

// TODO: there is a convention that `singletick` is for items that can
// be looked up in-scope while ``multitick`` is for opaque inline code.
// While rendering this we should make note of all the `singletick` locations
// and (possibly in a higher up piece of logic) try to resolve the names for
// cross-linking. (Similar to `TypeDetails` in the type formatting code.)
let mut output = String::new();
let mut inline_markup = rest::InlineMarkupRenderer::default();
let mut first_line = true;
let mut block_indent = 0;
let mut in_doctest = false;
Expand All @@ -210,17 +214,20 @@ fn render_markdown(docstring: &str) -> String {
let trimmed_source_line = line.trim_start_matches(' ');
let mut rendered_line = trimmed_source_line;
let line_indent = line.len() - trimmed_source_line.len();
let mut line_prefix = String::new();
let mut source_line_prefix = String::new();

// First thing's first, add a newline to start the new line
if !first_line {
// If we're not in a codeblock, add trailing space to the line to authentically wrap it
// (Lines ending with two spaces tell markdown to preserve a linebreak)
if !in_any_code {
output.push_str(" ");
line_prefix.push_str(" ");
}
// Only push newlines if we're not scanning for a real line
if starting_literal.is_none() {
output.push('\n');
line_prefix.push('\n');
source_line_prefix.push('\n');
}
}
first_line = false;
Expand All @@ -229,6 +236,10 @@ fn render_markdown(docstring: &str) -> String {
// TODO: we should remove all the trailing blank lines
// (Just pop all trailing `\n` from `output`?)
if in_literal && line_indent < block_indent && !rendered_line.is_empty() {
inline_markup.flush_pending_as_plain(&mut output);
output.push_str(&line_prefix);
line_prefix.clear();
source_line_prefix.clear();
in_literal = false;
in_any_code = false;
block_indent = 0;
Expand All @@ -241,6 +252,10 @@ fn render_markdown(docstring: &str) -> String {
if let Some(literal) = starting_literal
&& !rendered_line.is_empty()
{
inline_markup.flush_pending_as_plain(&mut output);
output.push_str(&line_prefix);
line_prefix.clear();
source_line_prefix.clear();
starting_literal = None;
in_literal = true;
in_any_code = true;
Expand All @@ -253,6 +268,10 @@ fn render_markdown(docstring: &str) -> String {

// If we're not in a codeblock and we see something that signals a doctest, start one
if !in_any_code && rendered_line.starts_with(">>>") {
inline_markup.flush_pending_as_plain(&mut output);
output.push_str(&line_prefix);
line_prefix.clear();
source_line_prefix.clear();
block_indent = line_indent;
in_doctest = true;
in_any_code = true;
Expand All @@ -263,6 +282,7 @@ fn render_markdown(docstring: &str) -> String {

// If we're not in a codeblock and we see a markdown codefence, start one
if !in_any_code && let Some(fence) = markdown::MarkdownFence::find(trimmed_source_line) {
inline_markup.flush_pending_as_plain(&mut output);
// Unlike other blocks we don't need to emit fences because it's already markdown
block_indent = line_indent;
in_any_code = true;
Expand All @@ -281,16 +301,19 @@ fn render_markdown(docstring: &str) -> String {
//
// We "make this work" by stripping the indent on the fences but preserving the
// full indent of the lines between the fences
output.push_str(&line_prefix);
output.push_str(rendered_line);
continue;
// If we're in a markdown code fence and this line seems to terminate it, end the block
} else if let Some(fence) = in_markdown_with_fence
&& fence.is_closed_by(rendered_line)
{
inline_markup.flush_pending_as_plain(&mut output);
in_any_code = false;
block_indent = 0;
in_markdown_with_fence = None;
// Render the line without its indent and move on.
output.push_str(&line_prefix);
output.push_str(rendered_line);
continue;
}
Expand Down Expand Up @@ -395,9 +418,11 @@ fn render_markdown(docstring: &str) -> String {
if !in_any_code {
// TODO: would the raw unicode codepoint be handled *better* or *worse*
// by various IDEs? VS Code handles this approach well, at least.
output.push_str("&nbsp;");
line_prefix.push_str("&nbsp;");
source_line_prefix.push(' ');
} else {
output.push(' ');
line_prefix.push(' ');
source_line_prefix.push(' ');
}
}

Expand All @@ -411,69 +436,15 @@ fn render_markdown(docstring: &str) -> String {
// Things that need to be escaped: underscores and HTML-sensitive characters.
//
// e.g. we want __init__ => \_\_init\_\_ but `__init__` => `__init__`
let escape = |input: &str| {
input
.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('_', "\\_")
};

let mut in_inline_code = false;
let mut first_chunk = true;
let mut opening_tick_count = 0;
let mut current_tick_count = 0;
for chunk in rendered_line.split('`') {
// First chunk is definitionally not in inline-code and so always plaintext
if first_chunk {
first_chunk = false;
output.push_str(&escape(chunk));
continue;
}
// Not in first chunk, emit the ` between the last chunk and this one
output.push('`');
current_tick_count += 1;

// If we're in an inline block and have enough close-ticks to terminate it, do so.
// TODO: we parse ``hello```there` as (hello)(there) which probably isn't correct
// (definitely not for markdown) but it's close enough for horse grenades in this
// MVP impl. Notably we're verbatime emitting all the `'s so as long as reST and
// markdown agree we're *fine*. The accuracy of this parsing only affects the
// accuracy of where we apply escaping (so we need to misparse and see escapables
// for any of this to matter).
if opening_tick_count > 0 && current_tick_count >= opening_tick_count {
opening_tick_count = 0;
current_tick_count = 0;
in_inline_code = false;
}

// If this chunk is completely empty we're just in a run of ticks, continue
if chunk.is_empty() {
continue;
}

// Ok the chunk is non-empty, our run of ticks is complete
if in_inline_code {
// The previous check for >= open_tick_count didn't trip, so these can't close
// and these ticks will be verbatim rendered in the content
current_tick_count = 0;
} else if current_tick_count > 0 {
// Ok we're now in inline code
opening_tick_count = current_tick_count;
current_tick_count = 0;
in_inline_code = true;
}

// Finally include the content either escaped or not
if in_inline_code {
output.push_str(chunk);
} else {
output.push_str(&escape(chunk));
}
}
// NOTE: explicitly not "flushing" the ticks here.
// We respect however the user closed their inline code.
inline_markup.render_line(
&mut output,
&line_prefix,
&source_line_prefix,
rendered_line,
);
} else if rendered_line.is_empty() {
inline_markup.flush_pending_as_plain(&mut output);
output.push_str(&line_prefix);
if in_doctest {
// This is the end of a doctest
block_indent = 0;
Expand All @@ -482,10 +453,13 @@ fn render_markdown(docstring: &str) -> String {
output.push_str(FENCE);
}
} else {
inline_markup.flush_pending_as_plain(&mut output);
output.push_str(&line_prefix);
// Print the line verbatim, it's in code
output.push_str(rendered_line);
}
}
inline_markup.flush_pending_as_plain(&mut output);
// Flush codeblock
if in_any_code {
output.push('\n');
Expand Down Expand Up @@ -869,6 +843,47 @@ mod tests {
"#);
}

#[test]
fn rest_hyperlinks() {
let _snap = bind_docstring_snapshot_filters();
let docstring = r#"
See `datetime-like <https://numpy.org/doc/stable/reference/arrays.datetime.html>`_ values.
Wrapped links render too: `timezone conversion and
localization
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
#time-zone-handling>`_.
Anonymous links work too: `project docs <https://example.com/docs>`__.
Raw placeholders still escape: <scheme>://<netloc>/<path>.
Link text escapes: `name_with_[chars] & <tags> <https://example.com/a path?q=(value)>`_.

Markdown code fences keep reST links literal:

```text
`not a link <https://example.com>`_
```

Missing underscores stay as inline code: `not a link <https://example.com>`.
"#;

let docstring = Docstring::new(docstring.to_owned());

assert_snapshot!(docstring.render_markdown(), @r"
See [datetime-like](https://numpy.org/doc/stable/reference/arrays.datetime.html) values.<HB>
Wrapped links render too: [timezone conversion and localization](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#time-zone-handling).<HB>
Anonymous links work too: [project docs](https://example.com/docs).<HB>
Raw placeholders still escape: &lt;scheme&gt;://&lt;netloc&gt;/&lt;path&gt;.<HB>
Link text escapes: [name\_with\_\[chars\] &amp; &lt;tags&gt;](https://example.com/a%20path?q=\(value\)).<HB>
<HB>
Markdown code fences keep reST links literal:<HB>
<HB>
```text
`not a link <https://example.com>`_
```<HB>
<HB>
Missing underscores stay as inline code: `not a link <https://example.com>`.
");
}

// A literal block where the `::` is flush with the paragraph
// and should become `:`
#[test]
Expand Down Expand Up @@ -1972,12 +1987,14 @@ mod tests {
assert_snapshot!(docstring.render_markdown(), @"
This is a function description.<HB>
<HB>
:param str param1: The first parameter description<HB>
:param int param2: The second parameter description<HB>
## Parameters<HB>
`param1` (`str`): The first parameter description<HB>
`param2` (`int`): The second parameter description<HB>
&nbsp;&nbsp;&nbsp;&nbsp;This is a continuation of param2 description.<HB>
:param param3: A parameter without type annotation<HB>
:returns: The return value description<HB>
:rtype: str
`param3`: A parameter without type annotation<HB>
<HB>
## Returns<HB>
`str`: The return value description
");
}

Expand Down Expand Up @@ -2041,8 +2058,9 @@ mod tests {
Args:<HB>
&nbsp;&nbsp;&nbsp;&nbsp;param1 (str): Google-style parameter<HB>
<HB>
:param int param2: reST-style parameter<HB>
:param param3: Another reST-style parameter<HB>
## Parameters<HB>
`param2` (`int`): reST-style parameter<HB>
`param3`: Another reST-style parameter<HB>
<HB>
Parameters<HB>
----------<HB>
Expand Down
Loading
Loading