6
6
7
7
use RedditImage \Exception \InvalidContentException ;
8
8
9
- class Content {
10
- private string $ content ;
9
+ class Content
10
+ {
11
11
private string $ preprocessed = '' ;
12
12
private string $ metadata = '' ;
13
13
private ?string $ contentLink = null ;
14
14
private ?string $ commentsLink = null ;
15
15
private string $ raw ;
16
16
private string $ real = '' ;
17
17
18
- public function __construct (string $ content ) {
19
- $ this -> content = $ content ;
18
+ public function __construct (string $ content )
19
+ {
20
20
$ this ->raw = $ content ;
21
21
22
22
$ this ->splitContent ($ content );
@@ -29,7 +29,8 @@ public function __construct(string $content) {
29
29
}
30
30
}
31
31
32
- private function isValid (): bool {
32
+ private function isValid (): bool
33
+ {
33
34
if ($ this ->metadata === '' ) {
34
35
return false ;
35
36
}
@@ -42,35 +43,43 @@ private function isValid(): bool {
42
43
return true ;
43
44
}
44
45
45
- public function getContentLink (): ?string {
46
+ public function getContentLink (): ?string
47
+ {
46
48
return $ this ->contentLink ;
47
49
}
48
50
49
- public function getCommentsLink (): ?string {
51
+ public function getCommentsLink (): ?string
52
+ {
50
53
return $ this ->commentsLink ;
51
54
}
52
55
53
- public function getPreprocessed (): string {
56
+ public function getPreprocessed (): string
57
+ {
54
58
return $ this ->preprocessed ;
55
59
}
56
60
57
- public function getMetadata (): string {
61
+ public function getMetadata (): string
62
+ {
58
63
return $ this ->metadata ;
59
64
}
60
65
61
- public function getRaw (): string {
66
+ public function getRaw (): string
67
+ {
62
68
return $ this ->raw ;
63
69
}
64
70
65
- public function getReal (): string {
71
+ public function getReal (): string
72
+ {
66
73
return $ this ->real ;
67
74
}
68
75
69
- public function hasBeenPreprocessed (): bool {
76
+ public function hasBeenPreprocessed (): bool
77
+ {
70
78
return '' !== $ this ->preprocessed ;
71
79
}
72
80
73
- public function hasReal (): bool {
81
+ public function hasReal (): bool
82
+ {
74
83
return '' !== $ this ->real ;
75
84
}
76
85
@@ -81,7 +90,8 @@ public function hasReal(): bool {
81
90
* fetch quickly. For instance when API calls are involved. Thus we need to
82
91
* separate the feed raw content from the preprocessed content.
83
92
*/
84
- private function splitContent (string $ content ): void {
93
+ private function splitContent (string $ content ): void
94
+ {
85
95
$ dom = new \DomDocument ('1.0 ' , 'UTF-8 ' );
86
96
$ dom ->loadHTML (
87
97
htmlspecialchars_decode (htmlentities (html_entity_decode ($ content ))),
@@ -91,10 +101,10 @@ private function splitContent(string $content): void {
91
101
$ xpath = new \DOMXpath ($ dom );
92
102
$ redditImage = $ xpath ->query ("//div[contains(@class,'reddit-image')] " );
93
103
94
- if (1 === $ redditImage ->length ) {
104
+ if ($ redditImage !== false && $ redditImage ->length === 1 ) {
95
105
$ node = $ redditImage ->item (0 );
96
- $ this ->preprocessed = $ dom ->saveHTML ($ node ->parentNode ->firstChild );
97
- $ this ->raw = $ dom ->saveHTML ($ node ->parentNode ->lastChild );
106
+ $ this ->preprocessed = $ dom ->saveHTML ($ node ->parentNode ->firstChild ) ?: '' ;
107
+ $ this ->raw = $ dom ->saveHTML ($ node ->parentNode ->lastChild ) ?: '' ;
98
108
}
99
109
}
100
110
@@ -106,7 +116,8 @@ private function splitContent(string $content): void {
106
116
* to the author page, the link to the current message, and the link to the
107
117
* current message comment section.
108
118
*/
109
- private function extractMetadata (): void {
119
+ private function extractMetadata (): void
120
+ {
110
121
if (preg_match ('#(?P<metadata>\s*?submitted.*</span>)# ' , $ this ->raw , $ matches )) {
111
122
$ this ->metadata = $ matches ['metadata ' ];
112
123
}
@@ -119,7 +130,8 @@ private function extractMetadata(): void {
119
130
* - content link.
120
131
* - comments link.
121
132
*/
122
- private function extractLinks (): void {
133
+ private function extractLinks (): void
134
+ {
123
135
$ dom = new \DomDocument ('1.0 ' , 'UTF-8 ' );
124
136
$ dom ->loadHTML (
125
137
htmlspecialchars_decode (htmlentities (html_entity_decode ($ this ->raw ))),
@@ -134,6 +146,7 @@ private function extractLinks(): void {
134
146
break ;
135
147
case '[comments] ' :
136
148
$ this ->commentsLink = $ link ->getAttribute ('href ' );
149
+ // no break
137
150
default :
138
151
break ;
139
152
}
@@ -147,7 +160,8 @@ private function extractLinks(): void {
147
160
* class attribute is sanitized to data-sanitized-class attribute when
148
161
* processed by SimplePie.
149
162
*/
150
- private function extractReal (): void {
163
+ private function extractReal (): void
164
+ {
151
165
$ dom = new \DomDocument ('1.0 ' , 'UTF-8 ' );
152
166
$ dom ->loadHTML (
153
167
htmlspecialchars_decode (htmlentities (html_entity_decode ($ this ->raw ))),
@@ -156,9 +170,9 @@ private function extractReal(): void {
156
170
157
171
$ xpath = new \DOMXpath ($ dom );
158
172
$ mdNode = $ xpath ->query ("//div[contains(@data-sanitized-class,'md')] " );
159
- if (1 === $ mdNode ->length ) {
173
+ if ($ mdNode !== false && $ mdNode ->length === 1 ) {
160
174
$ node = $ mdNode ->item (0 );
161
- $ this ->real = $ dom ->saveHTML ($ node );
175
+ $ this ->real = $ dom ->saveHTML ($ node ) ?: '' ;
162
176
}
163
177
}
164
178
}
0 commit comments