Skip to content

[Data Liberation] Block markup consumers and producers #2121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,46 +21,26 @@
use League\CommonMark\Extension\Table\TableRow;
use League\CommonMark\Extension\Table\TableSection;

class WP_Markdown_To_Blocks implements WP_Block_Markup_Converter {
const STATE_READY = 'STATE_READY';
const STATE_COMPLETE = 'STATE_COMPLETE';

private $state = self::STATE_READY;
class WP_Markdown_Consumer implements WP_Data_Format_Consumer {
private $root_block;
private $block_stack = array();
private $current_block = null;

private $frontmatter = array();
private $markdown;
private $parsed_blocks = array();
private $block_markup = '';
private $parsed;

public function __construct( $markdown ) {
$this->markdown = $markdown;
}

public function convert() {
if ( self::STATE_READY !== $this->state ) {
return false;
}
$this->convert_markdown_to_blocks();
$this->block_markup = WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks );
return true;
}

public function get_all_metadata() {
return $this->frontmatter;
}

public function get_first_meta_value( $key ) {
if ( ! array_key_exists( $key, $this->frontmatter ) ) {
return null;
}
return $this->frontmatter[ $key ][0];
}

public function get_block_markup() {
return $this->block_markup;
public function consume() {
if( ! $this->parsed ) {
$this->convert_markdown_to_blocks();
$this->parsed = new WP_Blocks_With_Metadata( WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks ), $this->frontmatter );
}
return $this->parsed;
}

private function convert_markdown_to_blocks() {
Expand All @@ -82,7 +62,7 @@ private function convert_markdown_to_blocks() {
$document = $parser->parse( $this->markdown );
$this->frontmatter = array();
foreach ( $document->data as $key => $value ) {
// Use an array as a value to comply with the WP_Block_Markup_Converter interface.
// Use an array as a value to comply with the WP_Data_Format_Consumer interface.
$this->frontmatter[ $key ] = array( $value );
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function ( $cursor = null ) use ( $markdown_directory ) {
'allowed_extensions' => array( 'md' ),
'index_file_patterns' => array( '#^index\.md$#' ),
'markup_converter_factory' => function ( $content ) {
return new WP_Markdown_To_Blocks( $content );
return new WP_Markdown_Consumer( $content );
},
)
);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?php

require_once __DIR__ . '/WP_Markdown_Importer.php';
require_once __DIR__ . '/WP_Markdown_To_Blocks.php';
require_once __DIR__ . '/WP_Markdown_Consumer.php';

require_once __DIR__ . '/../vendor/autoload.php';
12 changes: 10 additions & 2 deletions packages/playground/data-liberation/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,22 @@
require_once __DIR__ . '/src/wordpress-core-html-api/html5-named-character-references.php';
}

require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Converter.php';
require_once __DIR__ . '/src/Data_Liberation_Exception.php';
require_once __DIR__ . '/src/data-format-consumers/WP_Blocks_With_Metadata.php';
require_once __DIR__ . '/src/data-format-consumers/WP_Data_Format_Consumer.php';
require_once __DIR__ . '/src/data-format-consumers/WP_Markup_Processor_Consumer.php';
require_once __DIR__ . '/src/data-format-consumers/WP_Annotated_Block_Markup_Consumer.php';

require_once __DIR__ . '/src/data-format-producers/WP_Data_Format_Producer.php';
require_once __DIR__ . '/src/data-format-producers/WP_Annotated_Block_Markup_Producer.php';

require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Processor.php';
require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Url_Processor.php';
require_once __DIR__ . '/src/block-markup/WP_URL_In_Text_Processor.php';
require_once __DIR__ . '/src/block-markup/WP_URL.php';
require_once __DIR__ . '/src/block-markup/WP_HTML_To_Blocks.php';

require_once __DIR__ . '/src/entity-readers/WP_Entity_Reader.php';
require_once __DIR__ . '/src/entity-readers/WP_Blocks_With_Metadata_Entity_Reader.php';
require_once __DIR__ . '/src/entity-readers/WP_HTML_Entity_Reader.php';
require_once __DIR__ . '/src/entity-readers/WP_EPub_Entity_Reader.php';
require_once __DIR__ . '/src/entity-readers/WP_WXR_Entity_Reader.php';
Expand Down
3 changes: 1 addition & 2 deletions packages/playground/data-liberation/phpunit.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
<testsuites>
<testsuite name="Application Test Suite">
<file>tests/WPHTMLEntityReaderTests.php</file>
<file>tests/WPHTMLToBlocksTests.php</file>
<file>tests/WPWXRReaderTests.php</file>
<file>tests/WPRewriteUrlsTests.php</file>
<file>tests/WPHTMLToBlocksTests.php</file>
<file>tests/WPMarkupProcessorConsumerTests.php</file>
<file>tests/WPHTMLEntityReaderTests.php</file>
<file>tests/WPURLInTextProcessorTests.php</file>
<file>tests/WPBlockMarkupProcessorTests.php</file>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?php

/**
* Represents an error that occurs during the data liberation process.
*/
class Data_Liberation_Exception extends RuntimeException {

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?php

class WP_Data_Liberation_HTML_Processor extends WP_HTML_Processor {

public function get_inner_html() {
if ( '#tag' !== $this->get_token_type() ) {
return false;
}

if ( $this->is_tag_closer() ) {
return false;
}

if ( false === WP_HTML_Tag_Processor::set_bookmark( 'tag-start' ) ) {
return false;
}

$this->skip_to_closer();

if ( false === WP_HTML_Tag_Processor::set_bookmark( 'tag-end' ) ) {
WP_HTML_Tag_Processor::release_bookmark( 'tag-start' );
return false;
}

$inner_html_start = $this->bookmarks['tag-start']->start + $this->bookmarks['tag-start']->length;
$inner_html_end = $this->bookmarks['tag-end']->start - $inner_html_start;

WP_HTML_Tag_Processor::seek( 'tag-start' );
WP_HTML_Tag_Processor::release_bookmark( 'tag-start' );
WP_HTML_Tag_Processor::release_bookmark( 'tag-end' );

return substr(
$this->html,
$inner_html_start,
$inner_html_end
);
}

public function skip_to_closer() {
$starting_depth = $this->get_current_depth();
while ( $this->next_token() ) {
if (
$this->get_token_type() === '#tag' &&
$this->is_tag_closer() &&
$this->get_current_depth() === $starting_depth - 1
) {
return true;
}
}

return false;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?php
/**
* Converts a metadata-annotated block markup into block markup+metadata pair.
*
* Example:
*
* <meta name="post_title" content="My first post">
* <!-- wp:paragraph {"className":"my-class"} -->
* <p class="my-class">Hello world!</p>
* <!-- /wp:paragraph -->
*
* Becomes:
*
* <!-- wp:paragraph -->
* <p>Hello <b>world</b>!</p>
* <!-- /wp:paragraph -->
*
* With the following metadata:
*
* array(
* 'post_title' => array( 'My first post' ),
* )
*/
class WP_Annotated_Block_Markup_Consumer implements WP_Data_Format_Consumer {

/**
* @var string
*/
private $original_html;

/**
* @var WP_Consumed_Block_Markup
*/
private $result;

public function __construct( $original_html ) {
$this->original_html = $original_html;
}

public function consume() {
if ( ! $this->result ) {
$block_markup = '';
$metadata = array();
foreach ( parse_blocks( $this->original_html ) as $block ) {
if ( $block['blockName'] === null ) {
$html_converter = new WP_Markup_Processor_Consumer( WP_HTML_Processor::create_fragment( $block['innerHTML'] ) );
$result = $html_converter->consume();
$block_markup .= $result->get_block_markup() . "\n";
$metadata = array_merge( $metadata, $result->get_all_metadata() );
} else {
$block_markup .= serialize_block( $block ) . "\n";
}
}
$this->result = new WP_Blocks_With_Metadata(
$block_markup,
$metadata
);
}

return $this->result;
}
}
Original file line number Diff line number Diff line change
@@ -1,25 +1,39 @@
<?php

/**
* Represents a {Data Format} -> Block Markup + Metadata converter.
*
* Used by the Data Liberation importers to accept data formatted as HTML, Markdown, etc.
* and convert them to WordPress posts.
* Represents the result of a {data format} -> block markup conversion.
*/
interface WP_Block_Markup_Converter {
/**
* Converts the input document specified in the constructor to block markup.
*
* @return bool Whether the conversion was successful.
*/
public function convert();
class WP_Blocks_With_Metadata {

private $block_markup;
private $metadata;

public function __construct( $block_markup, $metadata = array() ) {
$this->block_markup = $block_markup;
$this->metadata = $metadata;
}

/**
* Gets the block markup generated by the convert() method.
* Gets the first metadata value for a given key.
*
* @return string The block markup.
* Example:
*
* Metadata:
* array(
* 'post_title' => array( 'The Name of the Wind' ),
* 'post_author' => array( 'Patrick Rothfuss', 'Betsy Wollheim' )
* )
*
* get_first_meta_value( 'post_author' ) returns 'Patrick Rothfuss'.
*
* @param string $key The metadata key.
* @return mixed The metadata value.
*/
public function get_block_markup();
public function get_first_meta_value( $key ) {
if ( ! array_key_exists( $key, $this->metadata ) ) {
return null;
}
return $this->metadata[ $key ][0];
}

/**
* Gets all the metadata sourced from the input document by the convert() method.
Expand All @@ -35,23 +49,16 @@ public function get_block_markup();
*
* @return array The metadata sourced from the input document.
*/
public function get_all_metadata();
public function get_all_metadata() {
return $this->metadata;
}

/**
* Gets the first metadata value for a given key.
*
* Example:
*
* Metadata:
* array(
* 'post_title' => array( 'The Name of the Wind' ),
* 'post_author' => array( 'Patrick Rothfuss', 'Betsy Wollheim' )
* )
*
* get_first_meta_value( 'post_author' ) returns 'Patrick Rothfuss'.
* Gets the block markup generated by the convert() method.
*
* @param string $key The metadata key.
* @return mixed The metadata value.
* @return string The block markup.
*/
public function get_first_meta_value( $key );
public function get_block_markup() {
return $this->block_markup;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?php

/**
* Represents a {Data Format} -> Block Markup + Metadata consumer.
*
* Used by the Data Liberation importers to accept data formatted as HTML, Markdown, etc.
* and convert them to WordPress posts.
*/
interface WP_Data_Format_Consumer {
/**
* Converts the input document specified in the constructor to block markup.
*
* @return WP_Blocks_With_Metadata The consumed block markup and metadata.
*/
public function consume();
}
Loading
Loading