-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathElementIndexerService.php
executable file
·311 lines (270 loc) · 11.4 KB
/
ElementIndexerService.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
<?php
/**
* Elasticsearch plugin for Craft CMS 3.x
*
* Bring the power of Elasticsearch to you Craft 3 CMS project
*
* @link https://www.lahautesociete.com
* @copyright Copyright (c) 2018 La Haute Société
*/
namespace lhs\elasticsearch\services;
use Craft;
use craft\base\Component;
use craft\base\Element;
use craft\commerce\elements\Product;
use craft\digitalproducts\elements\Product as DigitalProduct;
use craft\elements\Asset;
use craft\elements\Entry;
use craft\errors\SiteNotFoundException;
use craft\helpers\Db;
use craft\helpers\UrlHelper;
use lhs\elasticsearch\Elasticsearch as ElasticsearchPlugin;
use lhs\elasticsearch\exceptions\IndexElementException;
use lhs\elasticsearch\records\ElasticsearchRecord;
/**
*/
class ElementIndexerService extends Component
{
/** @var ElasticsearchPlugin */
public $plugin;
public function init(): void
{
parent::init();
$this->plugin = ElasticsearchPlugin::getInstance();
}
/**
* Index the given `$element` into Elasticsearch
* @param Element $element
* @return string|null A string explaining why the entry wasn't reindexed or `null` if it was reindexed
* @throws \yii\elasticsearch\Exception If an error occurs while saving the record to the Elasticsearch server
* @throws \GuzzleHttp\Exception\GuzzleException
* @throws IndexElementException If an error occurs while getting the indexable content of the entry. Check the previous property of the exception for more details
* @throws \yii\base\InvalidConfigException
* @throws \yii\db\Exception
* @throws \yii\db\StaleObjectException
*/
public function indexElement(Element $element): ?string
{
$reason = $this->getReasonForNotReindexing($element);
if ($reason !== null) {
return $reason;
}
Craft::info("Indexing entry {$element->url}", __METHOD__);
$postDate = $element instanceof Asset ? $element->dateCreated : $element->postDate;
$expiryDate = $element instanceof Asset ? null : $element->expiryDate;
$esRecord = $this->getElasticRecordForElement($element);
//@formatter:off
$esRecord->title = $element->title;
$esRecord->url = $element->url;
$esRecord->postDate = $postDate ? Db::prepareDateForDb($postDate) : null;
$esRecord->noPostDate = $postDate ? false : true;
$esRecord->expiryDate = $expiryDate ? Db::prepareDateForDb($expiryDate) : null;
$esRecord->noExpiryDate = $expiryDate ? false : true;
$esRecord->elementHandle = $element->refHandle();
//@formatter:on
$content = $this->getElementContent($element);
if ($content === false) {
$message = "Not indexing element #{$element->id} since it doesn't have a template.";
Craft::debug($message, __METHOD__);
return $message;
}
$esRecord->content = base64_encode(trim($content));
$isSuccessfullySaved = $esRecord->save();
if (!$isSuccessfullySaved) {
throw new \yii\elasticsearch\Exception('Could not save elasticsearch record');
}
return null;
}
/**
* Removes an entry from the Elasticsearch index
* @param Element $element The entry to delete
* @return int The number of rows deleted
* @throws \yii\elasticsearch\Exception If the entry to be deleted cannot be found
*/
public function deleteElement(Element $element): int
{
Craft::info("Deleting entry #{$element->id}: {$element->url}", __METHOD__);
ElasticsearchRecord::$siteId = $element->siteId;
return ElasticsearchRecord::deleteAll(['_id' => $element->id]);
}
/**
* Get the reason why an entry should NOT be reindex.
* @param Element $element The element to consider for reindexing
* @return string|null A string explaining why the entry wasn't reindexed or `null` if it was reindexed
* @throws \yii\base\InvalidConfigException
*/
protected function getReasonForNotReindexing(Element $element): ?string
{
if (!(
$element instanceof Entry
|| $element instanceof Product
|| $element instanceof DigitalProduct
|| $element instanceof Asset
)) {
$message = "Not indexing entry #{$element->id} since it is not an entry, an asset, a product or a digital product.";
Craft::debug($message, __METHOD__);
return $message;
}
if (!$element->enabledForSite) {
$sitesService = Craft::$app->getSites();
try {
$currentSiteId = $sitesService->getCurrentSite()->id;
$message = "Not indexing entry #{$element->id} since it is not enabled for the current site (#{$currentSiteId}).";
Craft::debug($message, __METHOD__);
return $message;
} catch (SiteNotFoundException $e) {
$message = "Not indexing entry #{$element->id} since there are no sites (therefore it can't be enabled for any site).";
Craft::debug($message, __METHOD__);
return $message;
}
}
if (!$element->hasContent()) {
$message = "Not indexing entry #{$element->id} since it has no content.";
Craft::debug($message, __METHOD__);
return $message;
}
if (!$element->getUrl()) {
$message = "Not indexing entry #{$element->id} since it has no URL.";
Craft::debug($message, __METHOD__);
return $message;
}
if ($element instanceof Entry) {
$blacklist = $this->plugin->getSettings()->blacklistedEntryTypes;
if (in_array($element->type->handle, $blacklist, true)) {
$message = "Not indexing entry #{$element->id} since it's in a blacklisted entry types.";
Craft::debug($message, __METHOD__);
return $message;
}
}
if ($element instanceof Asset) {
$blacklist = $this->plugin->getSettings()->blacklistedAssetVolumes;
if (in_array($element->getVolume()->handle, $blacklist, true)) {
$message = "Not indexing asset #{$element->id} since it's in a blacklisted asset volume.";
Craft::debug($message, __METHOD__);
return $message;
}
}
return null;
}
/**
* @param Element $element
* @return bool|string
* @throws \GuzzleHttp\Exception\GuzzleException
* @throws IndexElementException
*/
protected function getElementContent(Element $element)
{
if ($callback = $this->plugin->getSettings()->elementContentCallback) {
return $callback($element);
}
return $this->getElementIndexableContent($element);
}
/**
* Get an element page content using Guzzle
* @param Element $element
* @return bool|string The indexable content of the entry or `false` if the entry doesn't have a template (ie. is not indexable)
* @throws IndexElementException If anything goes wrong. Check the previous property of the exception to get more details
* @throws \GuzzleHttp\Exception\GuzzleException
*/
protected function getElementIndexableContent(Element $element)
{
Craft::debug('Getting element page content : ' . $element->url, __METHOD__);
// Special treatment for assets
if ($element instanceof Asset) {
try {
return $element->getContents();
} catch (\Throwable $e) {
return false;
}
}
// Request a sharable url for element in order to get content for pending ones
// First generate a token for shared view
/** @noinspection PhpUndefinedClassInspection */
if ($element instanceof Product) {
$token = Craft::$app->getTokens()->createToken(
[
'commerce/products-preview/view-shared-product',
['productId' => $element->id, 'siteId' => $element->siteId],
]
);
} else {
$schemaVersion = Craft::$app->getInstalledSchemaVersion();
if (version_compare($schemaVersion, '3.2.0', '>=')) {
$token = Craft::$app->getTokens()->createToken(
[
'preview/preview',
[
'elementType' => get_class($element),
'canonicalId' => $element->canonicalId,
'sourceId' => $element->id,
'siteId' => $element->siteId,
'draftId' => null,
'revisionId' => null,
],
]
);
} else {
$token = Craft::$app->getTokens()->createToken(
[
'entries/view-shared-entry',
['entryId' => $element->id, 'siteId' => $element->siteId],
]
);
}
}
// Generate the sharable url based on the previously generated token
$url = UrlHelper::urlWithToken($element->getUrl(), $token);
// Request the page content with GuzzleHttp\Client
$client = new \GuzzleHttp\Client(['connect_timeout' => 10]);
try {
$res = $client->request('GET', $url);
if ($res->getStatusCode() === 200) {
return $this->extractIndexablePart($res->getBody());
}
} catch (\GuzzleHttp\Exception\RequestException $e) {
Craft::error('Could not get element content: ' . $e->getMessage(), __METHOD__);
throw new IndexElementException($e->getMessage(), 0, $e);
} catch (\Exception $e) {
throw new IndexElementException(
Craft::t(
ElasticsearchPlugin::PLUGIN_HANDLE,
'An error occurred while parsing the element page content: {previousExceptionMessage}',
['previousExceptionMessage' => $e->getMessage()]
), 0, $e
);
}
return false;
}
/**
* @param $html
* @return string
*/
protected function extractIndexablePart(string $html): string
{
/** @noinspection NullPointerExceptionInspection NPE cannot happen here. */
if ($callback = ElasticsearchPlugin::getInstance()->getSettings()->contentExtractorCallback) {
$html = $callback($html);
}
if (preg_match('/<!-- BEGIN elasticsearch indexed content -->(.*)<!-- END elasticsearch indexed content -->/s', $html, $body)) {
$html = '<!DOCTYPE html>' . trim($body[1]);
}
return trim($html);
}
/**
* @param Element $element
* @return ElasticsearchRecord
*/
protected function getElasticRecordForElement(Element $element): ElasticsearchRecord
{
ElasticsearchRecord::$siteId = $element->siteId;
/** @var ElasticsearchRecord|null $esRecord */
$esRecord = ElasticsearchRecord::findOne($element->id);
if ($esRecord === null) {
$esRecord = new ElasticsearchRecord();
ElasticsearchRecord::$siteId = $element->siteId;
$esRecord->set_id($element->id);
}
$esRecord->setElement($element);
return $esRecord;
}
}