Skip to content

Commit 495be8b

Browse files
committed
Add support for categories
1 parent ca5eab7 commit 495be8b

File tree

4 files changed

+184
-53
lines changed

4 files changed

+184
-53
lines changed

packages/playground/data-liberation/src/import/WP_Topological_Sorter.php

+51-13
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@
99
*/
1010
class WP_Topological_Sorter {
1111

12-
public $posts = array();
13-
public $categories = array();
14-
public $category_index = array();
12+
public $posts = array();
13+
public $categories = array();
1514

1615
/**
1716
* Variable for keeping counts of orphaned posts/attachments, it'll also be assigned as temporarly post ID.
@@ -50,9 +49,8 @@ public function map_category( $byte_offset, $data ) {
5049
}
5150

5251
$this->categories[ $data['slug'] ] = array(
53-
'parent' => $data['parent'],
54-
'byte_offset' => $byte_offset,
55-
'visited' => false,
52+
array_key_exists( 'parent', $data ) ? $data['parent'] : '',
53+
$byte_offset,
5654
);
5755
}
5856

@@ -85,8 +83,12 @@ public function map_post( $byte_offset, $data ) {
8583

8684
/**
8785
* Get the byte offset of an element, and remove it from the list.
86+
*
87+
* @param int $id The ID of the post to get the byte offset.
88+
*
89+
* @return int|bool The byte offset of the post, or false if the post is not found.
8890
*/
89-
public function get_byte_offset( $id ) {
91+
public function get_post_byte_offset( $id ) {
9092
if ( ! $this->sorted ) {
9193
return false;
9294
}
@@ -97,7 +99,7 @@ public function get_byte_offset( $id ) {
9799
// Remove the element from the array.
98100
unset( $this->posts[ $id ] );
99101

100-
if ( 0 === count( $this->posts ) ) {
102+
if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) {
101103
// All posts have been processed.
102104
$this->reset();
103105
}
@@ -108,24 +110,52 @@ public function get_byte_offset( $id ) {
108110
return false;
109111
}
110112

113+
/**
114+
* Get the byte offset of an element, and remove it from the list.
115+
*
116+
* @param string $slug The slug of the category to get the byte offset.
117+
*
118+
* @return int|bool The byte offset of the category, or false if the category is not found.
119+
*/
120+
public function get_category_byte_offset( $slug ) {
121+
if ( ! $this->sorted ) {
122+
return false;
123+
}
124+
125+
if ( isset( $this->categories[ $slug ] ) ) {
126+
$ret = $this->categories[ $slug ];
127+
128+
// Remove the element from the array.
129+
unset( $this->categories[ $slug ] );
130+
131+
if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) {
132+
// All categories have been processed.
133+
$this->reset();
134+
}
135+
136+
return $ret;
137+
}
138+
139+
return false;
140+
}
141+
111142
public function is_sorted() {
112143
return $this->sorted;
113144
}
114145

115146
/**
116-
* Sort posts topologically.
147+
* Sort elements topologically.
117148
*
118-
* Children posts should not be processed before their parent has been processed.
119-
* This method sorts the posts in the order they should be processed.
120-
*
121-
* Sorted posts will be stored as attachments and posts/pages separately.
149+
* Elements should not be processed before their parent has been processed.
150+
* This method sorts the elements in the order they should be processed.
122151
*/
123152
public function sort_topologically( $free_space = true ) {
124153
foreach ( $this->categories as $slug => $category ) {
125154
$this->topological_category_sort( $slug, $category );
126155
}
127156

128157
$this->sort_elements( $this->posts );
158+
$this->sort_elements( $this->categories );
129159

130160
// Free some space.
131161
if ( $free_space ) {
@@ -136,6 +166,14 @@ public function sort_topologically( $free_space = true ) {
136166
// Save only the byte offset.
137167
$this->posts[ $id ] = $element[1];
138168
}
169+
170+
/**
171+
* @TODO: all the elements that have not been moved can be flushed away.
172+
*/
173+
foreach ( $this->categories as $slug => $element ) {
174+
// Save only the byte offset.
175+
$this->categories[ $slug ] = $element[1];
176+
}
139177
}
140178

141179
$this->sorted = true;

packages/playground/data-liberation/tests/WPStreamImporterTests.php

+43-32
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,23 @@ protected function setUp(): void {
1515
}
1616
}
1717

18-
/**
19-
* @before
18+
/**
19+
* @before
2020
*
2121
* TODO: Run each test in a fresh Playground instance instead of sharing the global
2222
* state like this.
23-
*/
24-
public function clean_up_uploads(): void
25-
{
26-
$files = glob( '/wordpress/wp-content/uploads/*' );
27-
foreach( $files as $file ) {
28-
if( is_dir( $file ) ) {
29-
array_map( 'unlink', glob( "$file/*.*" ) );
30-
rmdir( $file );
31-
} else {
32-
unlink( $file );
33-
}
34-
}
35-
}
23+
*/
24+
public function clean_up_uploads(): void {
25+
$files = glob( '/wordpress/wp-content/uploads/*' );
26+
foreach ( $files as $file ) {
27+
if ( is_dir( $file ) ) {
28+
array_map( 'unlink', glob( "$file/*.*" ) );
29+
rmdir( $file );
30+
} else {
31+
unlink( $file );
32+
}
33+
}
34+
}
3635

3736
public function test_import_simple_wxr() {
3837
$import = data_liberation_import( __DIR__ . '/wxr/small-export.xml' );
@@ -44,7 +43,7 @@ public function test_frontloading() {
4443
$wxr_path = __DIR__ . '/wxr/frontloading-1-attachment.xml';
4544
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path );
4645
$this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS );
47-
while( $importer->next_step() ) {
46+
while ( $importer->next_step() ) {
4847
// noop
4948
}
5049
$files = glob( '/wordpress/wp-content/uploads/*' );
@@ -57,17 +56,17 @@ public function test_resume_frontloading() {
5756
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path );
5857
$this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS );
5958

60-
$progress_url = null;
59+
$progress_url = null;
6160
$progress_value = null;
62-
for($i = 0; $i < 20; ++$i) {
61+
for ( $i = 0; $i < 20; ++$i ) {
6362
$importer->next_step();
6463
$progress = $importer->get_frontloading_progress();
65-
if( count( $progress ) === 0 ) {
64+
if ( count( $progress ) === 0 ) {
6665
continue;
6766
}
68-
$progress_url = array_keys( $progress )[0];
67+
$progress_url = array_keys( $progress )[0];
6968
$progress_value = array_values( $progress )[0];
70-
if( null === $progress_value['received'] ) {
69+
if ( null === $progress_value['received'] ) {
7170
continue;
7271
}
7372
break;
@@ -78,22 +77,22 @@ public function test_resume_frontloading() {
7877
$this->assertEquals( 'https://wpthemetestdata.files.wordpress.com/2008/06/canola2.jpg', $progress_url );
7978
$this->assertGreaterThan( 0, $progress_value['total'] );
8079

81-
$cursor = $importer->get_reentrancy_cursor();
82-
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, [], $cursor );
80+
$cursor = $importer->get_reentrancy_cursor();
81+
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, array(), $cursor );
8382
// Rewind back to the entity we were on.
8483
$this->assertTrue( $importer->next_step() );
8584

8685
// Restart the download of the same entity – from scratch.
87-
$progress_value = [];
88-
for($i = 0; $i < 20; ++$i) {
86+
$progress_value = array();
87+
for ( $i = 0; $i < 20; ++$i ) {
8988
$importer->next_step();
9089
$progress = $importer->get_frontloading_progress();
91-
if( count( $progress ) === 0 ) {
90+
if ( count( $progress ) === 0 ) {
9291
continue;
9392
}
94-
$progress_url = array_keys( $progress )[0];
93+
$progress_url = array_keys( $progress )[0];
9594
$progress_value = array_values( $progress )[0];
96-
if( null === $progress_value['received'] ) {
95+
if ( null === $progress_value['received'] ) {
9796
continue;
9897
}
9998
break;
@@ -105,17 +104,17 @@ public function test_resume_frontloading() {
105104
}
106105

107106
/**
108-
*
107+
* Test resume entity import.
109108
*/
110109
public function test_resume_entity_import() {
111110
$wxr_path = __DIR__ . '/wxr/entities-options-and-posts.xml';
112111
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path );
113112
$this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_IMPORT_ENTITIES );
114113

115-
for($i = 0; $i < 11; ++$i) {
114+
for ( $i = 0; $i < 11; ++$i ) {
116115
$this->assertTrue( $importer->next_step() );
117-
$cursor = $importer->get_reentrancy_cursor();
118-
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, [], $cursor );
116+
$cursor = $importer->get_reentrancy_cursor();
117+
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, array(), $cursor );
119118
// Rewind back to the entity we were on.
120119
// Note this means we may attempt to insert it twice. It's
121120
// the importer's job to detect that and skip the duplicate
@@ -125,6 +124,18 @@ public function test_resume_entity_import() {
125124
$this->assertFalse( $importer->next_step() );
126125
}
127126

127+
public function test_sort_categories() {
128+
$wxr_path = __DIR__ . '/wxr/mixed-categories.xml';
129+
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path );
130+
$this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_TOPOLOGICAL_SORT );
131+
132+
while ( $importer->next_step() ) {
133+
if ( $importer->get_next_stage() === WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS ) {
134+
break;
135+
}
136+
}
137+
}
138+
128139
private function skip_to_stage( WP_Stream_Importer $importer, string $stage ) {
129140
do {
130141
while ( $importer->next_step() ) {

packages/playground/data-liberation/tests/WPTopologicalSorterTests.php

+8-8
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ public function test_parent_after_child() {
2323
$sorter->sort_topologically();
2424

2525
$this->assertEquals( array( 2 => 20, 1 => 10 ), $sorter->posts );
26-
$this->assertEquals( 10, $sorter->get_byte_offset( 1 ) );
27-
$this->assertEquals( 20, $sorter->get_byte_offset( 2 ) );
26+
$this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) );
27+
$this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) );
2828
$this->assertFalse( $sorter->is_sorted() );
2929
}
3030

@@ -37,7 +37,7 @@ public function test_child_after_parent() {
3737
$sorter->sort_topologically();
3838

3939
$this->assertEquals( array( 1 => 10, 2 => 20, 3 => 30 ), $sorter->posts );
40-
$this->assertEquals( 10, $sorter->get_byte_offset( 1 ) );
40+
$this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) );
4141
}
4242

4343
public function test_orphaned_post() {
@@ -48,8 +48,8 @@ public function test_orphaned_post() {
4848
$sorter->sort_topologically();
4949

5050
$this->assertEquals( array( 1 => 10, 2 => 20 ), $sorter->posts );
51-
$this->assertEquals( 10, $sorter->get_byte_offset( 1 ) );
52-
$this->assertEquals( 20, $sorter->get_byte_offset( 2 ) );
51+
$this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) );
52+
$this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) );
5353
}
5454

5555
public function test_chain_parent_child_after() {
@@ -80,9 +80,9 @@ public function test_get_byte_offsets_consume_array() {
8080

8181
$this->assertEquals( array( 3 => 30, 2 => 20, 1 => 10 ), $sorter->posts );
8282

83-
$this->assertEquals( 10, $sorter->get_byte_offset( 1 ) );
84-
$this->assertEquals( 20, $sorter->get_byte_offset( 2 ) );
85-
$this->assertEquals( 30, $sorter->get_byte_offset( 3 ) );
83+
$this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) );
84+
$this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) );
85+
$this->assertEquals( 30, $sorter->get_post_byte_offset( 3 ) );
8686
$this->assertCount( 0, $sorter->posts );
8787
}
8888

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
<?xml version="1.0" encoding="UTF-8" ?>
2+
<rss version="2.0"
3+
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
4+
xmlns:content="http://purl.org/rss/1.0/modules/content/"
5+
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
6+
xmlns:dc="http://purl.org/dc/elements/1.1/"
7+
xmlns:wp="http://wordpress.org/export/1.2/"
8+
>
9+
10+
<channel>
11+
<title>Mixed Categories</title>
12+
<link>https://playground.wordpress.net/scope:funny-chic-valley</link>
13+
<description></description>
14+
<pubDate>Fri, 29 Nov 2024 12:36:23 +0000</pubDate>
15+
<language>en-US</language>
16+
<wp:wxr_version>1.2</wp:wxr_version>
17+
<wp:base_site_url>https://playground.wordpress.net/scope:funny-chic-valley</wp:base_site_url>
18+
<wp:base_blog_url>https://playground.wordpress.net/scope:funny-chic-valley</wp:base_blog_url>
19+
20+
<wp:author>
21+
<wp:author_id>1</wp:author_id>
22+
<wp:author_login><![CDATA[admin]]></wp:author_login>
23+
<wp:author_email><![CDATA[[email protected]]]></wp:author_email>
24+
<wp:author_display_name><![CDATA[admin]]></wp:author_display_name>
25+
<wp:author_first_name><![CDATA[]]></wp:author_first_name>
26+
<wp:author_last_name><![CDATA[]]></wp:author_last_name>
27+
</wp:author>
28+
29+
<wp:category>
30+
<wp:term_id>5</wp:term_id>
31+
<wp:category_nicename><![CDATA[parent-of-previous-category]]></wp:category_nicename>
32+
<wp:category_parent><![CDATA[]]></wp:category_parent>
33+
<wp:cat_name><![CDATA[Parent of previous category]]></wp:cat_name>
34+
</wp:category>
35+
<wp:category>
36+
<wp:term_id>1</wp:term_id>
37+
<wp:category_nicename><![CDATA[uncategorized]]></wp:category_nicename>
38+
<wp:category_parent><![CDATA[]]></wp:category_parent>
39+
<wp:cat_name><![CDATA[Test #1]]></wp:cat_name>
40+
</wp:category>
41+
<wp:category>
42+
<wp:term_id>3</wp:term_id>
43+
<wp:category_nicename><![CDATA[test-3]]></wp:category_nicename>
44+
<wp:category_parent><![CDATA[uncategorized]]></wp:category_parent>
45+
<wp:cat_name><![CDATA[Test #3]]></wp:cat_name>
46+
</wp:category>
47+
<wp:category>
48+
<wp:term_id>2</wp:term_id>
49+
<wp:category_nicename><![CDATA[test-child]]></wp:category_nicename>
50+
<wp:category_parent><![CDATA[parent-of-previous-category]]></wp:category_parent>
51+
<wp:cat_name><![CDATA[Test child]]></wp:cat_name>
52+
</wp:category>
53+
<wp:term>
54+
<wp:term_id>5</wp:term_id>
55+
<wp:term_taxonomy><![CDATA[category]]></wp:term_taxonomy>
56+
<wp:term_slug><![CDATA[parent-of-previous-category]]></wp:term_slug>
57+
<wp:term_parent><![CDATA[]]></wp:term_parent>
58+
<wp:term_name><![CDATA[Parent of previous category]]></wp:term_name>
59+
</wp:term>
60+
<wp:term>
61+
<wp:term_id>1</wp:term_id>
62+
<wp:term_taxonomy><![CDATA[category]]></wp:term_taxonomy>
63+
<wp:term_slug><![CDATA[uncategorized]]></wp:term_slug>
64+
<wp:term_parent><![CDATA[]]></wp:term_parent>
65+
<wp:term_name><![CDATA[Test #1]]></wp:term_name>
66+
</wp:term>
67+
<wp:term>
68+
<wp:term_id>3</wp:term_id>
69+
<wp:term_taxonomy><![CDATA[category]]></wp:term_taxonomy>
70+
<wp:term_slug><![CDATA[test-3]]></wp:term_slug>
71+
<wp:term_parent><![CDATA[uncategorized]]></wp:term_parent>
72+
<wp:term_name><![CDATA[Test #3]]></wp:term_name>
73+
</wp:term>
74+
<wp:term>
75+
<wp:term_id>2</wp:term_id>
76+
<wp:term_taxonomy><![CDATA[category]]></wp:term_taxonomy>
77+
<wp:term_slug><![CDATA[test-child]]></wp:term_slug>
78+
<wp:term_parent><![CDATA[parent-of-previous-category]]></wp:term_parent>
79+
<wp:term_name><![CDATA[Test child]]></wp:term_name>
80+
</wp:term>
81+
</channel>
82+
</rss>

0 commit comments

Comments
 (0)