Skip to content

Commit 869f6bd

Browse files
authored
[Data Liberation] Move Markdown importer to a separate package (#2093)
Moves the Markdown importer to a `data-liberation-markdown` package so that it can be shipped as a separate `.phar` file and downloaded only when needed. ## Testing instructions This only moves code around. To test, confirm the CI PHP unit tests keep working. A part of: * #2080 * #1894
1 parent 27d799a commit 869f6bd

File tree

6 files changed

+114
-352
lines changed

6 files changed

+114
-352
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
<?php
2+
3+
use WordPress\Filesystem\WP_Filesystem;
4+
5+
class WP_Markdown_Importer extends WP_Stream_Importer {
6+
7+
public static function create_for_markdown_directory( $markdown_directory, $options = array(), $cursor = null ) {
8+
return WP_Markdown_Importer::create(
9+
function ( $cursor = null ) use ( $markdown_directory ) {
10+
// @TODO: Handle $cursor
11+
return new WP_Directory_Tree_Entity_Reader(
12+
new WP_Filesystem(),
13+
array (
14+
'root_dir' => $markdown_directory,
15+
'first_post_id' => 1,
16+
'allowed_extensions' => array( 'md' ),
17+
'index_file_patterns' => array( '#^index\.md$#' ),
18+
'markup_converter_factory' => function( $content ) {
19+
return new WP_Markdown_To_Blocks( $content );
20+
},
21+
)
22+
);
23+
},
24+
$options,
25+
$cursor
26+
);
27+
}
28+
29+
protected static function parse_options( $options ) {
30+
if ( ! isset( $options['source_site_url'] ) ) {
31+
_doing_it_wrong( __METHOD__, 'The source_site_url option is required.', '__WP_VERSION__' );
32+
return false;
33+
}
34+
$options['default_source_site_url'] = $options['source_site_url'];
35+
36+
if ( ! isset( $options['local_markdown_assets_root'] ) ) {
37+
_doing_it_wrong( __METHOD__, 'The markdown_assets_root option is required.', '__WP_VERSION__' );
38+
return false;
39+
}
40+
if ( ! is_dir( $options['local_markdown_assets_root'] ) ) {
41+
_doing_it_wrong( __METHOD__, 'The markdown_assets_root option must point to a directory.', '__WP_VERSION__' );
42+
return false;
43+
}
44+
$options['local_markdown_assets_root'] = rtrim( $options['local_markdown_assets_root'], '/' );
45+
46+
return parent::parse_options( $options );
47+
}
48+
49+
protected function rewrite_attachment_url( string $raw_url, $context_path = null ) {
50+
/**
51+
* For Docusaurus docs, URLs starting with `@site` are referring
52+
* to local files. Let's convert them to file:// URLs.
53+
*/
54+
if (
55+
isset( $this->options['local_markdown_assets_url_prefix'] ) &&
56+
str_starts_with( $raw_url, $this->options['local_markdown_assets_url_prefix'] )
57+
) {
58+
// @TODO: Source the file from the current input stream if we can.
59+
// This would allow stream-importing zipped Markdown and WXR directory
60+
// structures.
61+
// Maybe for v1 we could just support importing them from ZIP files
62+
// that are already downloaded and available in a local directory just
63+
// to avoid additional data transfer and the hurdle with implementing
64+
// multiple range requests.
65+
$relative_asset_path = substr( $raw_url, strlen( $this->options['local_markdown_assets_url_prefix'] ) );
66+
$relative_asset_path = '/' . ltrim( $relative_asset_path, '/' );
67+
$raw_url = (
68+
'file://' .
69+
$this->options['local_markdown_assets_root'] .
70+
$relative_asset_path
71+
);
72+
}
73+
74+
return parent::rewrite_attachment_url( $raw_url, $context_path );
75+
}
76+
77+
/**
78+
* When processing Markdown, we'll download all the images
79+
* referenced in the image tags.
80+
*
81+
* @TODO: Actually, should we?
82+
* @TODO: How can we process the videos?
83+
* @TODO: What other asset types are there?
84+
*/
85+
protected function url_processor_matched_asset_url( WP_Block_Markup_Url_Processor $p ) {
86+
return (
87+
$p->get_tag() === 'IMG' &&
88+
$p->get_inspected_attribute_name() === 'src'
89+
);
90+
}
91+
}

packages/playground/data-liberation/src/markdown-api/WP_Markdown_To_Blocks.php renamed to packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php

+17-36
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@
2121
use League\CommonMark\Extension\Table\TableRow;
2222
use League\CommonMark\Extension\Table\TableSection;
2323

24-
25-
class WP_Markdown_To_Blocks {
24+
class WP_Markdown_To_Blocks implements WP_Block_Markup_Converter {
2625
const STATE_READY = 'STATE_READY';
2726
const STATE_COMPLETE = 'STATE_COMPLETE';
2827

@@ -40,19 +39,26 @@ public function __construct( $markdown ) {
4039
$this->markdown = $markdown;
4140
}
4241

43-
public function parse() {
42+
public function convert() {
4443
if ( self::STATE_READY !== $this->state ) {
4544
return false;
4645
}
4746
$this->convert_markdown_to_blocks();
48-
$this->block_markup = self::convert_blocks_to_markup( $this->parsed_blocks );
47+
$this->block_markup = WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks );
4948
return true;
5049
}
5150

52-
public function get_frontmatter() {
51+
public function get_all_metadata() {
5352
return $this->frontmatter;
5453
}
5554

55+
public function get_meta_value( $key ) {
56+
if ( ! array_key_exists( $key, $this->frontmatter ) ) {
57+
return null;
58+
}
59+
return $this->frontmatter[ $key ][0];
60+
}
61+
5662
public function get_block_markup() {
5763
return $this->block_markup;
5864
}
@@ -74,7 +80,11 @@ private function convert_markdown_to_blocks() {
7480
$parser = new MarkdownParser( $environment );
7581

7682
$document = $parser->parse( $this->markdown );
77-
$this->frontmatter = $document->data;
83+
$this->frontmatter = [];
84+
foreach( $document->data as $key => $value ) {
85+
// Use an array as a value to comply with the WP_Block_Markup_Converter interface.
86+
$this->frontmatter[ $key ] = [$value];
87+
}
7888

7989
$walker = $document->walker();
8090
while ( true ) {
@@ -163,7 +173,7 @@ private function convert_markdown_to_blocks() {
163173
'content' => '<pre class="wp-block-code"><code>' . trim( str_replace( "\n", '<br>', htmlspecialchars( $node->getLiteral() ) ) ) . '</code></pre>',
164174
)
165175
);
166-
if ( $node->getInfo() ) {
176+
if ( method_exists( $node, 'getInfo' ) && $node->getInfo() ) {
167177
$this->current_block->attrs['language'] = preg_replace( '/[ \t\r\n\f].*/', '', $node->getInfo() );
168178
}
169179
break;
@@ -339,35 +349,6 @@ private function convert_markdown_to_blocks() {
339349
$this->parsed_blocks = $this->root_block->inner_blocks;
340350
}
341351

342-
private static function convert_blocks_to_markup( $blocks ) {
343-
$block_markup = '';
344-
345-
foreach ( $blocks as $block ) {
346-
// Start of block comment
347-
$comment = '<!-- -->';
348-
$p = new WP_HTML_Tag_Processor( $comment );
349-
$p->next_token();
350-
$attrs = $block->attrs;
351-
$content = $block->attrs['content'] ?? '';
352-
unset( $attrs['content'] );
353-
$encoded_attrs = json_encode( $attrs );
354-
if ( $encoded_attrs === '[]' ) {
355-
$encoded_attrs = '';
356-
}
357-
$p->set_modifiable_text( " wp:{$block->block_name} " . $encoded_attrs . ' ' );
358-
$open_comment = $p->get_updated_html();
359-
360-
$block_markup .= $open_comment . "\n";
361-
$block_markup .= $content . "\n";
362-
$block_markup .= self::convert_blocks_to_markup( $block->inner_blocks );
363-
364-
// End of block comment
365-
$block_markup .= "<!-- /wp:{$block->block_name} -->\n";
366-
}
367-
368-
return $block_markup;
369-
}
370-
371352
private function append_content( $content ) {
372353
if ( ! isset( $this->current_block->attrs['content'] ) ) {
373354
$this->current_block->attrs['content'] = '';
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<?php
2+
3+
require_once __DIR__ . '/WP_Markdown_Importer.php';
4+
require_once __DIR__ . '/WP_Markdown_To_Blocks.php';
5+
6+
require_once __DIR__ . '/../vendor/autoload.php';

packages/playground/data-liberation/bootstrap.php

-14
Original file line numberDiff line numberDiff line change
@@ -67,20 +67,6 @@
6767

6868
require_once __DIR__ . '/src/utf8_decoder.php';
6969

70-
/**
71-
* Require conditionally – these files are missing from the data-liberation-core.phar
72-
* to reduce the bundle size (we'd need to include a large markdown parser and its
73-
* dependencies, too).
74-
*
75-
* @TODO: Build a separate "data-liberation-markdown" phar file plugin with the Markdown
76-
* importing functionality.
77-
*/
78-
if ( file_exists( __DIR__ . '/src/markdown-api/WP_Markdown_To_Blocks.php' ) ) {
79-
require_once __DIR__ . '/src/markdown-api/WP_Markdown_To_Blocks.php';
80-
require_once __DIR__ . '/src/markdown-api/WP_Markdown_Directory_Tree_Reader.php';
81-
require_once __DIR__ . '/src/markdown-api/WP_Markdown_HTML_Processor.php';
82-
}
83-
8470
// When running in Playground, the composer autoloader script sees CLI SAPI and
8571
// tries to use the STDERR, STDIN, and STDOUT constants.
8672
// @TODO: Don't use the "cli" SAPI string and don't allow composer to run platform checks.

0 commit comments

Comments
 (0)