Skip to content

Commit b9f5edb

Browse files
authored
[Data Liberation] Build markdown importer as phar (#2094)
Builds data-liberation-markdown.phar.gz (200KB) to enable downloading the Markdown importer only when needed instead of on every page load. A part of: * #2080 * #1894 ## Testing instructions Run `nx build playground-data-liberation-markdown`, confirm it finished without errors. A smoke test of the built phar file is included in the build command.
1 parent 869f6bd commit b9f5edb

20 files changed

+251
-85
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
<?php
2+
3+
use KevinGH\Box\Compactor\Compactor;
4+
5+
class DataLiberationBoxCompactor implements Compactor
6+
{
7+
/**
8+
* {@inheritdoc}
9+
*/
10+
public function compact(string $file, string $contents): string
11+
{
12+
if (!preg_match('/\.(php|json|lock)$/', $file)) {
13+
return '';
14+
}
15+
16+
if (
17+
str_contains($file, 'platform_check.php') ||
18+
str_contains($file, '/tests/') ||
19+
str_contains($file, '/.git/') ||
20+
str_contains($file, '/.github/') ||
21+
str_contains($file, '/bin/')
22+
) {
23+
return '';
24+
}
25+
26+
if( str_contains($contents, 'Your Composer dependencies require ') ) {
27+
return '';
28+
}
29+
30+
31+
return $contents;
32+
}
33+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<?php
2+
3+
$box_base_path = dirname(getenv('BOX_BASE_PATH'));
4+
require_once $box_base_path . '/../autoload.php';
5+
require_once __DIR__ . '/DataLiberationBoxCompactor.php';
6+
require_once $box_base_path . '/box';
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<?php
2+
require_once __DIR__ . '/../../bootstrap.php';
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<?php
2+
3+
require_once __DIR__ . '/../../../data-liberation/dist/data-liberation-core.phar.gz';
4+
require_once __DIR__ . '/../../dist/data-liberation-markdown.phar';
5+
6+
/**
7+
* None of this will actually try to parse a file or import
8+
* any data. We're just making sure the importer can
9+
* be created without throwing an exception.
10+
*/
11+
$markdown_root = __DIR__ . '/markdown-test-data';
12+
$c = WP_Markdown_Importer::create_for_markdown_directory(
13+
$markdown_root,
14+
array(
15+
'source_site_url' => 'file://' . $markdown_root,
16+
'local_markdown_assets_root' => $markdown_root,
17+
'local_markdown_assets_url_prefix' => '@site/',
18+
),
19+
$import['cursor'] ?? null
20+
);
21+
22+
echo 'Markdown importer created!';
23+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<?php
2+
3+
$file = $argv[1];
4+
$phar = new Phar($file);
5+
$phar->startBuffering();
6+
7+
8+
/**
9+
* Box, includes an autoloader with a fixed name in every build.
10+
* However, we want to load two .phar files built with Box, not
11+
* one. Unfortunately this yields an error:
12+
*
13+
* Cannot declare class ComposerAutoloaderInitHumbugBox451
14+
*
15+
* Therefore, we're giving all the HumbugBox classes a unique suffix.
16+
*/
17+
$autoloadSuffix = substr(md5(__FILE__), 0, 8);
18+
foreach (new RecursiveIteratorIterator($phar) as $file) {
19+
if(!$file->isFile()) {
20+
continue;
21+
}
22+
$relativePath = $file->getPathname();
23+
$relativePath = str_replace('phar://', '', $relativePath);
24+
$relativePath = str_replace($phar->getPath().'/', '', $relativePath);
25+
$contents = $file->getContent();
26+
$updated_contents = $contents;
27+
foreach([
28+
'InitHumbugBox',
29+
] as $class) {
30+
$updated_contents = str_replace($class, $class . $autoloadSuffix, $updated_contents);
31+
}
32+
if($updated_contents !== $contents) {
33+
$phar[$relativePath] = $updated_contents;
34+
}
35+
}
36+
37+
/**
38+
* Box, very annoyingly, force-adds a platform_check.php file
39+
* into the final built .phar archive. The vendor libraries
40+
* do work with a PHP version lower than 8.1 enforced by that
41+
* platform_check.php file, so let's just truncate it.
42+
*/
43+
$phar['vendor/composer/platform_check.php'] = '';
44+
$phar['.box/bin/check-requirements.php'] = '';
45+
$phar->stopBuffering();
46+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"$schema": "https://raw.githubusercontent.com/box-project/box/refs/heads/main/res/schema.json",
3+
"main": "src/bootstrap.php",
4+
"output": "dist/data-liberation-markdown.phar",
5+
"force-autodiscovery": true,
6+
"compactors": [
7+
"KevinGH\\Box\\Compactor\\Php",
8+
"DataLiberationBoxCompactor"
9+
],
10+
"annotations": false,
11+
"directories": ["src/", "vendor/"]
12+
}

packages/playground/data-liberation-markdown/composer.json

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
2+
"name": "wordpress/data-liberation-markdown",
23
"name": "wordpress/data-liberation-markdown",
34
"prefer-stable": true,
45
"require": {

packages/playground/data-liberation-markdown/dist/.gitkeep

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/bin/bash
2+
3+
# Builds the standalone dist/core-data-liberation.phar.gz file meant for
4+
# use in the importWxr Blueprint step.
5+
#
6+
# This is a temporary measure until we have a canonical way of distributing,
7+
# versioning, and using the Data Liberation modules and their dependencies.
8+
# Possible solutions might include composer packages, WordPress plugins, or
9+
# tree-shaken zip files with each module and its composer deps.
10+
11+
set -e
12+
echo "Building data liberation plugin"
13+
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
14+
DATA_LIBERATION_DIR=$SCRIPT_DIR
15+
BUILD_DIR=$DATA_LIBERATION_DIR/bin/build
16+
DIST_DIR=$DATA_LIBERATION_DIR/dist
17+
18+
rm $DIST_DIR/* > /dev/null 2>&1 || true
19+
export BOX_BASE_PATH=$(type -a box | grep -v 'alias' | awk '{print $3}')
20+
php $BUILD_DIR/box.php compile -d $DATA_LIBERATION_DIR -c $DATA_LIBERATION_DIR/box.json
21+
php -d 'phar.readonly=0' $BUILD_DIR/truncate-composer-checks.php $DIST_DIR/data-liberation-markdown.phar
22+
php $BUILD_DIR/smoke-test.php
23+
PHP=8.0 bun $DATA_LIBERATION_DIR/../../php-wasm/cli/src/main.ts $BUILD_DIR/smoke-test.php
24+
cd $DIST_DIR
25+
gzip data-liberation-markdown.phar
26+
ls -sgh $DIST_DIR
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
<ruleset name="WordPressStandard">
2+
<description>PHP 7.0 compatibility.</description>
3+
<config name="testVersion" value="7.2"/>
4+
<exclude-pattern>vendor/*</exclude-pattern>
5+
<rule ref="PHPCompatibility">
6+
<exclude name="PHPCompatibility.Keywords.ForbiddenNamesAsDeclared"/>
7+
</rule>
8+
<rule ref="WordPress-Core">
9+
<exclude name="Generic.Commenting.DocComment.MissingShort"/>
10+
<exclude name="Generic.PHP.DiscourageGoto.Found"/>
11+
<exclude name="Generic.CodeAnalysis.EmptyStatement.DetectedIf"/>
12+
<!-- Unused arguments are necessary when inheriting from classes and overriding methods. -->
13+
<exclude name="Generic.CodeAnalysis.UnusedFunctionParameter.Found"/>
14+
<exclude name="Squiz.PHP.NonExecutableCode.Unreachable"/>
15+
<exclude name="Squiz.Commenting.BlockComment.CloserSameLine"/>
16+
<exclude name="Squiz.Commenting.ClassComment.Missing"/>
17+
<exclude name="Squiz.Commenting.FileComment.WrongStyle"/>
18+
<exclude name="Squiz.Commenting.FileComment.Missing"/>
19+
<exclude name="Squiz.Commenting.FunctionComment.Missing"/>
20+
<exclude name="Squiz.Commenting.FunctionComment.MissingParamTag"/>
21+
<exclude name="Squiz.Commenting.FunctionComment.MissingParamType"/>
22+
<exclude name="Squiz.Commenting.FunctionComment.MissingParamComment"/>
23+
<exclude name="Squiz.Commenting.VariableComment.Missing"/>
24+
<exclude name="Squiz.PHP.CommentedOutCode.Found"/>
25+
<!-- "Parameter comment must end with a full stop" is such a pebble in the shoe. -->
26+
<exclude name="Squiz.Commenting.FunctionComment.ParamCommentFullStop"/>
27+
<exclude name="Squiz.PHP.DisallowSizeFunctionsInLoops.Found"/>
28+
<!-- Aligning the 1500 lines of public_suffix_list.php adds a lot of unnecessary noise and then
29+
the actual indentation is not even correct because the rule seems to cound bytes, not printable
30+
UTF-8 characteds. -->
31+
<exclude name="WordPress.Arrays.MultipleStatementAlignment.DoubleArrowNotAligned"/>
32+
<exclude name="WordPress.Files.FileName.InvalidClassFileName"/>
33+
<exclude name="WordPress.Files.FileName.NotHyphenatedLowercase"/>
34+
<exclude name="WordPress.PHP.YodaConditions.NotYoda"/>
35+
<exclude name="WordPress.Security.EscapeOutput.OutputNotEscaped"/>
36+
<exclude name="WordPress.WP.AlternativeFunctions"/>
37+
<exclude name="WordPress.WP.AlternativeFunctions.file_system_operations_fclose"/>
38+
</rule>
39+
</ruleset>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"name": "playground-data-liberation-markdown",
3+
"$schema": "../../../node_modules/nx/schemas/project-schema.json",
4+
"sourceRoot": "packages/playground/data-liberation-markdown",
5+
"projectType": "library",
6+
"targets": {
7+
"install": {
8+
"executor": "nx:run-commands",
9+
"options": {
10+
"cwd": "packages/playground/data-liberation-markdown",
11+
"commands": ["composer install"],
12+
"parallel": false
13+
}
14+
},
15+
"build:phar": {
16+
"executor": "nx:run-commands",
17+
"options": {
18+
"cwd": "packages/playground/data-liberation-markdown",
19+
"commands": ["bash ./phar-build.sh"],
20+
"parallel": false
21+
},
22+
"dependsOn": ["playground-data-liberation:build:phar"]
23+
},
24+
"lint:php": {
25+
"executor": "nx:run-commands",
26+
"options": {
27+
"cwd": "packages/playground/data-liberation-markdown",
28+
"commands": [
29+
"../data-liberation/vendor/bin/phpcs --standard=./phpcs.xml -s ./src ./*.php"
30+
],
31+
"parallel": false
32+
}
33+
},
34+
"lint:php:fix": {
35+
"executor": "nx:run-commands",
36+
"options": {
37+
"cwd": "packages/playground/data-liberation-markdown",
38+
"commands": [
39+
"../data-liberation/vendor/bin/phpcbf --standard=./phpcs.xml ./src"
40+
],
41+
"parallel": false
42+
}
43+
}
44+
}
45+
}

packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php

+4-4
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@ public static function create_for_markdown_directory( $markdown_directory, $opti
88
return WP_Markdown_Importer::create(
99
function ( $cursor = null ) use ( $markdown_directory ) {
1010
// @TODO: Handle $cursor
11-
return new WP_Directory_Tree_Entity_Reader(
11+
return new WP_Directory_Tree_Entity_Reader(
1212
new WP_Filesystem(),
13-
array (
13+
array(
1414
'root_dir' => $markdown_directory,
1515
'first_post_id' => 1,
1616
'allowed_extensions' => array( 'md' ),
1717
'index_file_patterns' => array( '#^index\.md$#' ),
18-
'markup_converter_factory' => function( $content ) {
18+
'markup_converter_factory' => function ( $content ) {
1919
return new WP_Markdown_To_Blocks( $content );
2020
},
2121
)
@@ -42,7 +42,7 @@ protected static function parse_options( $options ) {
4242
return false;
4343
}
4444
$options['local_markdown_assets_root'] = rtrim( $options['local_markdown_assets_root'], '/' );
45-
45+
4646
return parent::parse_options( $options );
4747
}
4848

packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php

+3-3
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,10 @@ private function convert_markdown_to_blocks() {
8080
$parser = new MarkdownParser( $environment );
8181

8282
$document = $parser->parse( $this->markdown );
83-
$this->frontmatter = [];
84-
foreach( $document->data as $key => $value ) {
83+
$this->frontmatter = array();
84+
foreach ( $document->data as $key => $value ) {
8585
// Use an array as a value to comply with the WP_Block_Markup_Converter interface.
86-
$this->frontmatter[ $key ] = [$value];
86+
$this->frontmatter[ $key ] = array( $value );
8787
}
8888

8989
$walker = $document->walker();

packages/playground/data-liberation-markdown/src/bootstrap.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
require_once __DIR__ . '/WP_Markdown_Importer.php';
44
require_once __DIR__ . '/WP_Markdown_To_Blocks.php';
55

6-
require_once __DIR__ . '/../vendor/autoload.php';
6+
require_once __DIR__ . '/../vendor/autoload.php';

packages/playground/data-liberation/bootstrap.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
require_once __DIR__ . '/src/wordpress-core-html-api/html5-named-character-references.php';
4545
}
4646

47+
require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Converter.php';
4748
require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Processor.php';
4849
require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Url_Processor.php';
4950
require_once __DIR__ . '/src/block-markup/WP_URL_In_Text_Processor.php';
@@ -63,7 +64,6 @@
6364
require_once __DIR__ . '/src/import/WP_Stream_Importer.php';
6465
require_once __DIR__ . '/src/import/WP_Entity_Iterator_Chain.php';
6566
require_once __DIR__ . '/src/import/WP_Retry_Frontloading_Iterator.php';
66-
require_once __DIR__ . '/src/import/WP_Markdown_Importer.php';
6767

6868
require_once __DIR__ . '/src/utf8_decoder.php';
6969

Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<?php
2+
3+
interface WP_Block_Markup_Converter {
4+
public function convert();
5+
public function get_block_markup();
6+
public function get_all_metadata();
7+
public function get_meta_value( $key );
8+
}

packages/playground/data-liberation/src/import/WP_Markdown_Importer.php

-75
This file was deleted.

0 commit comments

Comments
 (0)