|
| 1 | +<?php |
| 2 | +declare(strict_types=1); |
| 3 | + |
| 4 | +/** |
| 5 | + * Add or Edit Joomla! Articles Via API Using Streamed CSV |
| 6 | + * - When id = 0 in csv it's doing a POST. If alias exists it add a random slug at the end of your alias and do POST again |
| 7 | + * - When id > 0 in csv it's doing a PATCH. If alias exists it add a random slug at the end of your alias and do PATCH again |
| 8 | + * - Now supports: |
| 9 | + * - subform custom fields in article |
| 10 | + * - images: intro / fulltext images in article |
| 11 | + * - urls: urla,urlb,urlc in article |
| 12 | + * @author Alexandre ELISÉ <[email protected]> |
| 13 | + * @copyright (c) 2009 - present. Alexandre ELISÉ. All rights reserved. |
| 14 | + * @license GPL-2.0-and-later GNU General Public License v2.0 or later |
| 15 | + * @link https://alexandree.io |
| 16 | + */ |
| 17 | + |
| 18 | +// Public url of the sample csv used in this example (CHANGE WITH YOUR OWN CSV URL IF YOU WISH) |
| 19 | +$csvUrl = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vTO8DC8xzEEyP754B0kBu1sa2P9Rn3I8OLmq_RJYHwOwTlY8OGvpjp1yvaE84Imj0HYQeJcNKT2TOFR/pub?output=csv'; |
| 20 | + |
| 21 | +// Your Joomla! 4.x website base url |
| 22 | +$baseUrl = ''; |
| 23 | +// Your Joomla! 4.x Api Token (DO NOT STORE IT IN YOUR REPO USE A VAULT OR A PASSWORD MANAGER) |
| 24 | +$token = ''; |
| 25 | +$basePath = 'api/index.php/v1'; |
| 26 | + |
| 27 | + |
| 28 | +// Request timeout |
| 29 | +$timeout = 10; |
| 30 | + |
| 31 | +// Add custom fields support (shout-out to Marc DECHÈVRE : CUSTOM KING) |
| 32 | +// The keys are the columns in the csv with the custom fields names (that's how Joomla! Web Services Api work as of today) |
| 33 | +// For the custom fields to work they need to be added in the csv and to exists in the Joomla! site. |
| 34 | +$customFieldKeys = ['']; //['with-coffee','with-dessert','extra-water-bottle']; |
| 35 | + |
| 36 | + |
| 37 | +// This time we need endpoint to be a function to make it more dynamic |
| 38 | +$endpoint = function (string $givenBaseUrl, string $givenBasePath, int $givenResourceId = 0): string { |
| 39 | + return $givenResourceId ? sprintf('%s/%s/%s/%d', $givenBaseUrl, $givenBasePath, 'content/articles', $givenResourceId) |
| 40 | + : sprintf('%s/%s/%s', $givenBaseUrl, $givenBasePath, 'content/articles'); |
| 41 | +}; |
| 42 | + |
| 43 | +// PHP Generator to efficiently read the csv file |
| 44 | +$generator = function (string $url, array $keys = []): Generator { |
| 45 | + |
| 46 | + if (empty($url)) |
| 47 | + { |
| 48 | + yield new RuntimeException('Url MUST NOT be empty', 422); |
| 49 | + } |
| 50 | + |
| 51 | + $defaultKeys = [ |
| 52 | + 'id', |
| 53 | + 'title', |
| 54 | + 'alias', |
| 55 | + 'catid', |
| 56 | + 'articletext', |
| 57 | + 'introtext', |
| 58 | + 'fulltext', |
| 59 | + 'language', |
| 60 | + 'metadesc', |
| 61 | + 'metakey', |
| 62 | + 'state', |
| 63 | + 'featured', |
| 64 | + 'publish_up', |
| 65 | + 'publish_down', |
| 66 | + 'featured_up', |
| 67 | + 'featured_down', |
| 68 | + 'images', |
| 69 | + 'urls', |
| 70 | + ]; |
| 71 | + |
| 72 | + $mergedKeys = array_unique(array_merge($defaultKeys, $keys)); |
| 73 | + |
| 74 | + // Assess robustness of the code by trying random key order |
| 75 | + //shuffle($mergedKeys); |
| 76 | + |
| 77 | + $resource = fopen($url, 'r'); |
| 78 | + |
| 79 | + if ($resource === false) |
| 80 | + { |
| 81 | + yield new RuntimeException('Could not read csv file', 500); |
| 82 | + } |
| 83 | + |
| 84 | + try |
| 85 | + { |
| 86 | + //NON-BLOCKING I/O (Does not wait before processing next line.) |
| 87 | + stream_set_blocking($resource, false); |
| 88 | + |
| 89 | + $firstLine = stream_get_line( |
| 90 | + $resource, |
| 91 | + 0, |
| 92 | + "\r\n" |
| 93 | + ); |
| 94 | + |
| 95 | + if (empty($firstLine)) |
| 96 | + { |
| 97 | + yield new RuntimeException('First line MUST NOT be empty. It is the header', 422); |
| 98 | + } |
| 99 | + |
| 100 | + $csvHeaderKeys = str_getcsv($firstLine); |
| 101 | + $commonKeys = array_intersect($csvHeaderKeys, $mergedKeys); |
| 102 | + |
| 103 | + do |
| 104 | + { |
| 105 | + $currentLine = stream_get_line( |
| 106 | + $resource, |
| 107 | + 0, |
| 108 | + "\r\n" |
| 109 | + ); |
| 110 | + |
| 111 | + if (empty($currentLine)) |
| 112 | + { |
| 113 | + yield new RuntimeException('Current line MUST NOT be empty', 422); |
| 114 | + } |
| 115 | + |
| 116 | + $extractedContent = str_getcsv($currentLine); |
| 117 | + |
| 118 | + // Allow using csv keys in any order |
| 119 | + $commonValues = array_intersect_key($extractedContent, $commonKeys); |
| 120 | + |
| 121 | + // Iteration on leafs AND nodes |
| 122 | + $handleComplexValues = []; |
| 123 | + $iterator = new RecursiveIteratorIterator(new RecursiveArrayIterator($commonValues), RecursiveIteratorIterator::CATCH_GET_CHILD); |
| 124 | + foreach ($iterator as $key => $value) |
| 125 | + { |
| 126 | + if (json_decode($value) === false) |
| 127 | + { |
| 128 | + $handleComplexValues[$key] = json_encode($value); |
| 129 | + } |
| 130 | + else |
| 131 | + { |
| 132 | + $handleComplexValues[$key] = $value; |
| 133 | + } |
| 134 | + echo 'current item key: ' . $key . ' with value ' . $handleComplexValues[$key] . PHP_EOL; |
| 135 | + } |
| 136 | + |
| 137 | + $encodedContent = json_encode(array_combine($commonKeys, $handleComplexValues)); |
| 138 | + if ($encodedContent !== false) |
| 139 | + { |
| 140 | + yield $encodedContent; |
| 141 | + } |
| 142 | + |
| 143 | + yield new RuntimeException('Current line seem to be invalid', 422); |
| 144 | + } while (!feof($resource)); |
| 145 | + } finally |
| 146 | + { |
| 147 | + fclose($resource); |
| 148 | + } |
| 149 | +}; |
| 150 | + |
| 151 | +// Process data returned by the PHP Generator |
| 152 | +$process = function (string $givenHttpVerb, string $endpoint, string $dataString, array $headers, int $timeout, $transport) { |
| 153 | + curl_setopt_array($transport, [ |
| 154 | + CURLOPT_URL => $endpoint, |
| 155 | + CURLOPT_RETURNTRANSFER => true, |
| 156 | + CURLOPT_ENCODING => 'utf-8', |
| 157 | + CURLOPT_MAXREDIRS => 10, |
| 158 | + CURLOPT_TIMEOUT => $timeout, |
| 159 | + CURLOPT_FOLLOWLOCATION => true, |
| 160 | + CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_2TLS, |
| 161 | + CURLOPT_CUSTOMREQUEST => $givenHttpVerb, |
| 162 | + CURLOPT_POSTFIELDS => $dataString, |
| 163 | + CURLOPT_HTTPHEADER => $headers, |
| 164 | + ] |
| 165 | + ); |
| 166 | + |
| 167 | + $response = curl_exec($transport); |
| 168 | + // Continue even on partial failure |
| 169 | + if (empty($response)) |
| 170 | + { |
| 171 | + throw new RuntimeException('Empty output', 422); |
| 172 | + } |
| 173 | + |
| 174 | + return $response; |
| 175 | +}; |
| 176 | +// Read CSV in a PHP Generator using streams in non-blocking I/O mode |
| 177 | +$streamCsv = $generator($csvUrl, $customFieldKeys); |
| 178 | +$storage = []; |
| 179 | +foreach ($streamCsv as $dataKey => $dataString) |
| 180 | +{ |
| 181 | + if (!is_string($dataString)) |
| 182 | + { |
| 183 | + continue; |
| 184 | + } |
| 185 | + $curl = curl_init(); |
| 186 | + try |
| 187 | + { |
| 188 | + // HTTP request headers |
| 189 | + $headers = [ |
| 190 | + 'Accept: application/vnd.api+json', |
| 191 | + 'Content-Type: application/json', |
| 192 | + 'Content-Length: ' . mb_strlen($dataString), |
| 193 | + sprintf('X-Joomla-Token: %s', trim($token)), |
| 194 | + ]; |
| 195 | + $decodedDataString = json_decode($dataString, true); |
| 196 | + // Article primary key. Usually 'id' |
| 197 | + $pk = (int) $decodedDataString['id']; |
| 198 | + $output = $process($pk ? 'PATCH' : 'POST', $endpoint($baseUrl, $basePath, $pk), $dataString, $headers, $timeout, $curl); |
| 199 | + |
| 200 | + $decodedJsonOutput = json_decode($output, true); |
| 201 | + |
| 202 | + // don't show errors, handle them gracefully |
| 203 | + if (isset($decodedJsonOutput['errors'])) |
| 204 | + { |
| 205 | + // If article is potentially a duplicate (already exists with same alias) |
| 206 | + $storage[$dataKey] = ['mightExists' => $decodedJsonOutput['errors'][0]['code'] === 400, 'decodedDataString' => $decodedDataString]; |
| 207 | + continue; |
| 208 | + } |
| 209 | + echo $output . PHP_EOL; |
| 210 | + } |
| 211 | + catch (Throwable $e) |
| 212 | + { |
| 213 | + echo $e->getMessage() . PHP_EOL; |
| 214 | + continue; |
| 215 | + } finally |
| 216 | + { |
| 217 | + curl_close($curl); |
| 218 | + } |
| 219 | +} |
| 220 | +// Handle errors and retries |
| 221 | +foreach ($storage as $item) |
| 222 | +{ |
| 223 | + $curl = curl_init(); |
| 224 | + try |
| 225 | + { |
| 226 | + if ($item['mightExists']) |
| 227 | + { |
| 228 | + $pk = (int) $item['decodedDataString']['id']; |
| 229 | + $item['decodedDataString']['alias'] = sprintf('%s-%s', $item['decodedDataString']['alias'], bin2hex(random_bytes(4))); |
| 230 | + // back to json string after changing alias |
| 231 | + $dataString = json_encode($item['decodedDataString']); |
| 232 | + |
| 233 | + // HTTP request headers |
| 234 | + $headers = [ |
| 235 | + 'Accept: application/vnd.api+json', |
| 236 | + 'Content-Type: application/json', |
| 237 | + 'Content-Length: ' . mb_strlen($dataString), |
| 238 | + sprintf('X-Joomla-Token: %s', trim($token)), |
| 239 | + ]; |
| 240 | + |
| 241 | + $output = $process($pk ? 'PATCH' : 'POST', $endpoint($baseUrl, $basePath, $pk), $dataString, $headers, $timeout, $curl); |
| 242 | + echo $output . PHP_EOL; |
| 243 | + } |
| 244 | + } |
| 245 | + catch (Throwable $e) |
| 246 | + { |
| 247 | + echo $e->getMessage() . PHP_EOL; |
| 248 | + continue; |
| 249 | + } finally |
| 250 | + { |
| 251 | + curl_close($curl); |
| 252 | + } |
| 253 | +} |
0 commit comments