diff --git a/sys/parsoid.js b/sys/parsoid.js index 932ba8bc0..3870415b0 100644 --- a/sys/parsoid.js +++ b/sys/parsoid.js @@ -120,7 +120,6 @@ class ParsoidService { _initOpts(opts = {}) { this.options = opts; this.parsoidUri = opts.host || opts.parsoidHost; - this.options.stash_ratelimit = opts.stash_ratelimit || 5; delete this.options.parsoidHost; this._blacklist = compileReRenderBlacklist(opts.rerenderBlacklist); if (!this.parsoidUri) { @@ -144,35 +143,6 @@ class ParsoidService { return this.options.disabled_storage[domain] || this.options.disabled_storage.default; } - _checkStashRate(hyper, req) { - if (!hyper.ratelimiter) { - return; - } - if (hyper._rootReq.headers['x-request-class'] !== 'external') { - return; - } - if (!((req.query && req.query.stash) || (req.body && req.body.stash))) { - return; - } - const key = `${hyper.config.service_name}.parsoid_stash|` + - `${hyper._rootReq.headers['x-client-ip']}`; - if (hyper.ratelimiter.isAboveLimit(key, this.options.stash_ratelimit)) { - hyper.logger.log('warn/parsoid/stashlimit', { - key, - rate_limit_per_second: this.options.stash_ratelimit, - message: 'Stashing rate limit exceeded' - }); - throw new HTTPError({ - status: 429, - body: { - type: 'request_rate_exceeded', - title: 'Stashing rate limit exceeded', - rate_limit_per_second: this.options.stash_ratelimit - } - }); - } - } - /** * Assembles the request that is to be used to call the Parsoid service * @@ -203,22 +173,6 @@ class ParsoidService { ]); } - /** - * Gets the URI of a bucket for stashing Parsoid content. Used both for stashing - * original HTML/Data-Parsoid for normal edits as well as for stashing transforms - * - * @param {string} domain the domain name - * @param {string} title the article title - * @param {number} revision the revision of the article - * @param {string} tid the TID of the content - * @return {HyperSwitch.URI} - */ - _getStashBucketURI(domain, title, revision, tid) { - return new URI([ - domain, 'sys', 'key_value', 'parsoidphp-stash', `${title}:${revision}:${tid}` - ]); - } - getFormatAndCheck(format, hyper, req) { return this.getFormat(format, hyper, req) .tap((res) => { @@ -234,50 +188,6 @@ class ParsoidService { }); } - /** - * Get full content from the stash bucket. - * @param {HyperSwitch} hyper the hyper object to route requests - * @param {string} domain the domain name - * @param {string} title the article title - * @param {number} revision the article revision - * @param {string} tid the render TID - * @return {Promise} the promise resolving to full stashed Parsoid - * response or a stashed transform - * @private - */ - _getStashedContent(hyper, domain, title, revision, tid) { - return hyper.get({ - uri: this._getStashBucketURI(domain, title, revision, tid) - }) - .then((res) => { - res = res.body; - res.revid = revision; - return res; - }); - } - - _saveParsoidResultToFallback(hyper, req, parsoidResp) { - const rp = req.params; - const dataParsoidResponse = parsoidResp.body['data-parsoid']; - const htmlResponse = parsoidResp.body.html; - const etag = mwUtil.parseETag(parsoidResp.headers.etag); - return hyper.put({ - uri: this._getStashBucketURI(rp.domain, rp.title, etag.rev, etag.tid), - // Note. The headers we are storing here are for the whole pagebundle response. - // The individual components of the pagebundle contain their own headers that - // which are used to generate actual responses. - headers: { - 'x-store-etag': parsoidResp.headers.etag, - 'content-type': 'application/octet-stream', - 'x-store-content-type': 'application/json' - }, - body: Buffer.from(JSON.stringify({ - 'data-parsoid': dataParsoidResponse, - html: htmlResponse - })) - }); - } - /** * Saves the Parsoid pagebundle result to the latest bucket. * // TODO: Optimization opportunity. We look what's in the @@ -324,61 +234,30 @@ class ParsoidService { })); } - stashTransform(hyper, req, transformPromise) { - // A stash has been requested. We need to store the wikitext sent by - // the client together with the page bundle returned by Parsoid, so it - // can be later reused when transforming back from HTML to wikitext - // cf https://phabricator.wikimedia.org/T114548 - const rp = req.params; - const tid = uuidv1(); - const etag = mwUtil.makeETag(rp.revision, tid, 'stash'); - const wtType = req.original && req.original.headers['content-type'] || 'text/plain'; - return transformPromise.then((original) => hyper.put({ - uri: this._getStashBucketURI(rp.domain, rp.title, rp.revision, tid), - headers: { - 'x-store-etag': etag, - 'content-type': 'application/octet-stream', - 'x-store-content-type': 'application/json' - }, - body: Buffer.from(JSON.stringify({ - 'data-parsoid': original.body['data-parsoid'], - wikitext: { - headers: { 'content-type': wtType }, - body: req.body.wikitext - }, - html: original.body.html - })) - }) - // Add the ETag to the original response so it can be propagated back to the client - .then(() => { - original.body.html.headers.etag = etag; - return original; - })); - } - /** - * Returns the content with fallback to the stash. Revision and TID are optional. + * Returns stored content. Revision and TID are optional. * If only 'title' is provided, only 'latest' bucket is checked. * If 'title' and 'revision' are provided, first the 'latest' bucket is checked. - * Then, the stored revision is compared, if they do not equal, 404 is returned - * as we can not check the stash with no tid provided. - * If all the 'title', 'revision' and 'tid' are provided, - * we check the latest bucket first, and then the stash bucket. + * Then, the stored revision is compared, if they do not equal, 404 is returned. * @param {HyperSwitch} hyper the hyper object to rout requests * @param {string} domain the domain name * @param {string} title the article title * @param {number} [revision] the article revision * @param {string} [tid] the render TID - * @return {Promise} the promise that resolves to full stashed Parsoid response + * @return {Promise} the promise that resolves to full stored Parsoid response * @private */ - _getContentWithFallback(hyper, domain, title, revision, tid) { - if (!revision && !tid) { + _getContentFromStorage(hyper, domain, title, revision, tid) { + if (!revision) { return hyper.get({ uri: this._getLatestBucketURI(domain, title) }); - } else if (!tid) { + } else { return hyper.get({ uri: this._getLatestBucketURI(domain, title) }) .then((res) => { const resEtag = mwUtil.parseETag(res.headers.etag); + if (tid && tid !== resEtag.tid) { + throw new HTTPError({ status: 404 }); + } + if (revision !== resEtag.rev) { throw new HTTPError({ status: 404 }); } @@ -403,6 +282,13 @@ class ParsoidService { } _getPageBundleFromParsoid(hyper, req) { + const headers = {}; + + // If a specific tid is requested, pass this constraint to Parsoid + if (req.params.revision && req.params.tid) { + headers['if-match'] = mwUtil.makeETag(req.params.revision, req.params.tid); + } + const rp = req.params; let path = `page/pagebundle/${encodeURIComponent(rp.title)}`; @@ -412,15 +298,11 @@ class ParsoidService { const parsoidReq = this._getParsoidReq( req, - path - ); + `page/pagebundle/${encodeURIComponent(rp.title)}/${rp.revision}`, + headers + )); - return hyper.get(parsoidReq) - .then((resp) => { - return resp; - }, (error) => { - throw error; - }); + return hyper.get(parsoidReq); } /** @@ -440,8 +322,13 @@ class ParsoidService { }) .then(() => P.join(this._getPageBundleFromParsoid(hyper, req), currentContentRes) .spread((res, currentContentRes) => { - const tid = uuidv1(); + // Use the tid from the etag received from Parsoid if present. + const tid = res.headers.etag ? + mwUtil.parseETag(res.headers.etag).tid : + uuidv1(); + const etag = mwUtil.makeETag(rp.revision, tid); + res.body.html.body = insertTidMeta(res.body.html.body, tid); res.body.html.headers.etag = res.headers.etag = etag; @@ -545,10 +432,8 @@ class ParsoidService { }); } - // check the rate limit for stashing requests - this._checkStashRate(hyper, req); - - let contentReq; + let contentReq = + this._getContentFromStorage(hyper, rp.domain, rp.title, rp.revision, rp.tid); const disabledStorage = this._isStorageDisabled(req.params.domain); @@ -616,10 +501,6 @@ class ParsoidService { }); } } - if (req.query.stash) { - return this._saveParsoidResultToFallback(hyper, req, res) - .thenReturn(res); - } return res; }) .then((res) => { @@ -636,13 +517,7 @@ class ParsoidService { } mwUtil.normalizeContentType(res); - if (req.query.stash) { - // The stash is used by clients that want further support - // for transforming the content. If the content is stored in caches, - // subsequent requests might not even reach RESTBase and the stash - // will expire, thus no-cache. - res.headers['cache-control'] = 'no-cache'; - } else if (this.options.response_cache_control) { + if (this.options.response_cache_control) { res.headers['cache-control'] = this.options.response_cache_control; } @@ -701,29 +576,7 @@ class ParsoidService { // reject with 404. In this case we would just not provide it. contentPromise = P.resolve(undefined); } else { - if (etag && etag.suffix === 'stash' && from === 'html' && to === 'wikitext') { - // T235465: RB should trust its own ETag over the client-supplied revision, but - // allow for the client to be right, so provide their revision as a fall-back - const revMismatch = etag.rev !== rp.revision; - if (revMismatch) { - // the ETag and URI parameter do not agree, log this (for now?) - hyper.logger.log('warn/parsoid/etag_rev', { - msg: 'The revisions in If-Match and URI differ' - }); - } - contentPromise = this._getStashedContent(hyper, rp.domain, - rp.title, etag.rev, etag.tid) - .catch({ status: 404 }, (e) => { - if (!revMismatch) { - // the revisions match, so this is a genuine 404 - throw e; - } - return this._getStashedContent( - hyper, rp.domain, rp.title, rp.revision, tid); - }); - } else { - contentPromise = this._getOriginalContent(hyper, req, rp.revision, tid); - } + contentPromise = this._getOriginalContent(hyper, req, rp.revision, tid); contentPromise = contentPromise .tap((original) => { // Check if parsoid metadata is present as it's required by parsoid. @@ -759,8 +612,7 @@ class ParsoidService { original, [from]: req.body[from], scrub_wikitext: req.body.scrub_wikitext, - body_only: req.body.body_only, - stash: req.body.stash + body_only: req.body.body_only } }; return this.callParsoidTransform(hyper, newReq, from, to); @@ -811,9 +663,6 @@ class ParsoidService { ); const transformPromise = hyper.post(parsoidReq); - if (req.body.stash && from === 'wikitext' && to === 'html') { - return this.stashTransform(hyper, req, transformPromise); - } return transformPromise; } @@ -855,23 +704,6 @@ class ParsoidService { } }); } - // check if we have all the info for stashing - if (req.body.stash) { - if (!rp.title) { - throw new HTTPError({ - status: 400, - body: { - type: 'bad_request', - description: 'Data can be stashed only for a specific title.' - } - }); - } - if (!rp.revision) { - rp.revision = '0'; - } - // check the rate limit for stashing requests - this._checkStashRate(hyper, req); - } let transform; if (rp.revision && rp.revision !== '0') { @@ -936,7 +768,7 @@ class ParsoidService { _getOriginalContent(hyper, req, revision, tid) { const rp = req.params; - return this._getContentWithFallback(hyper, rp.domain, rp.title, revision, tid) + return this._getContentFromStorage(hyper, rp.domain, rp.title, revision, tid) .then((res) => { res = res.body; res.revid = revision; @@ -960,16 +792,6 @@ module.exports = (options = {}) => { body: { valueType: 'blob' } - }, - { - uri: '/{domain}/sys/key_value/parsoidphp-stash', - headers: { - 'content-type': 'application/json' - }, - body: { - valueType: 'blob', - default_time_to_live: options.grace_ttl || 86400 - } } ] }; diff --git a/test/features/pagecontent/pagecontent.js b/test/features/pagecontent/pagecontent.js index 54adf7d71..0047a2f4c 100644 --- a/test/features/pagecontent/pagecontent.js +++ b/test/features/pagecontent/pagecontent.js @@ -96,15 +96,6 @@ describe('item requests', function() { assert.validateListHeader(res.headers.vary, { require: ['Accept'], disallow: [''] }); }); }); - it('should not allow to frontend cache HTML if requested a stash', () => { - return preq.get({ - uri: `${server.config.bucketURL()}/html/${title}?stash=true`, - }) - .then((res) => { - assert.deepEqual(res.status, 200); - assert.deepEqual(res.headers['cache-control'], 'no-cache'); - }); - }); it('should request page lints. no revision', () => { return preq.get({ @@ -126,47 +117,6 @@ describe('item requests', function() { }); }); - let rev2Etag; - it(`should transparently create data-parsoid with id ${prevRevisions[1]}, rev 2`, () => { - return preq.get({ - uri: `${server.config.bucketURL()}/html/${title}/${prevRevisions[1]}?stash=true` - }) - .then((res) => { - assert.deepEqual(res.status, 200); - assert.validateListHeader(res.headers.vary, { require: ['Accept'], disallow: [''] }); - rev2Etag = res.headers.etag.replace(/^"(.*)"$/, '$1'); - }); - }); - - it(`should return data-parsoid just created with revision ${rev2Etag}, rev 2`, () => { - return preq.get({ - uri: `${server.config.bucketURL()}/data-parsoid/${title}/${rev2Etag}` - }) - .then((res) => { - assert.deepEqual(res.status, 200); - assert.contentType(res, contentTypes['data-parsoid']); - }); - }); - - it(`should return HTML and data-parsoid just created by revision ${prevRevisions[2]}`, () => { - return preq.get({ - uri: `${server.config.bucketURL()}/html/${title}/${prevRevisions[2]}?stash=true` - }) - .then((res) => { - assert.deepEqual(res.status, 200); - assert.contentType(res, contentTypes.html); - assert.validateListHeader(res.headers.vary, { require: ['Accept'], disallow: [''] }); - return preq.get({ - uri: `${server.config.bucketURL()}/data-parsoid/${title}/${ - res.headers.etag.replace(/^"(.*)"$/, '$1')}` - }); - }) - .then((res) => { - assert.deepEqual(res.status, 200); - assert.contentType(res, contentTypes['data-parsoid']); - }); - }); - it('should list APIs using the generic listing handler', () => { return preq.get({ uri: `${server.config.hostPort}/${server.config.defaultDomain}/` diff --git a/test/features/pagecontent/rerendering.js b/test/features/pagecontent/rerendering.js index 735594089..200a6e3fc 100644 --- a/test/features/pagecontent/rerendering.js +++ b/test/features/pagecontent/rerendering.js @@ -33,7 +33,7 @@ describe('page re-rendering', function() { let r1etag1; let r1etag2; let r2etag1; - return preq.get({uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${dynamic2}?stash=true`}) + return preq.get({uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${dynamic2}`}) .then(async (res) => { assert.deepEqual(res.status, 200); r1etag1 = res.headers.etag; @@ -49,7 +49,7 @@ describe('page re-rendering', function() { return P.delay(3000) .then(() => { return preq.get({ - uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${dynamic2}?stash=true`, + uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${dynamic2}`, headers: { 'cache-control': 'no-cache' } }); }); @@ -67,7 +67,7 @@ describe('page re-rendering', function() { assert.deepEqual(res.headers.etag, r1etag2); hasTextContentType(res); return preq.get({ - uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${dynamic1}?stash=true`, + uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${dynamic1}`, headers: { 'cache-control': 'no-cache' } }); }) @@ -100,65 +100,4 @@ describe('page re-rendering', function() { }); }); - // A static test page - const static1 = '/html/User:Pchelolo%2fStatic/275852'; - const static2 = '/html/User:Pchelolo%2fStatic/275853'; - - it('should render & re-render independent revisions, but not update unchanged content', function () { - let r1etag1; - let r1etag2; - let r2etag1; - let tid; - return preq.get({uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${static1}?stash=true`}) - .then((res) => { - assert.deepEqual(res.status, 200); - r1etag1 = res.headers.etag; - hasTextContentType(res); - - return preq.get({ - uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${static1}?stash=true`, - headers: { 'cache-control': 'no-cache' } - }); - }) - .then((res) => { - // Since this is a static page which should render the same each - // time, the tid should not change. - r1etag2 = res.headers.etag; - assert.deepEqual(r1etag2, r1etag1); - assert.notDeepEqual(r1etag2, undefined); - hasTextContentType(res); - - return preq.get({uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${static1}`}); - }) - .then((res) => { - assert.deepEqual(res.headers.etag, r1etag1); - hasTextContentType(res); - - return preq.get({uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${static1}`}); - }) - .then((res) => { - assert.deepEqual(res.headers.etag, r1etag2); - hasTextContentType(res); - - return preq.get({uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${static1}`}); - }) - .then((res) => { - assert.deepEqual(res.headers.etag, r1etag2); - hasTextContentType(res); - - return preq.get({uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${static2}?stash=true`}); - }) - .then((res) => { - r2etag1 = res.headers.etag; - assert.deepEqual(res.status, 200); - hasTextContentType(res); - - return preq.get({uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}${static2}`}); - }) - .then((res) => { - assert.deepEqual(res.headers.etag, r2etag1); - hasTextContentType(res); - }); - }); - }); diff --git a/test/features/parsoid/transform.js b/test/features/parsoid/transform.js index d6540f934..40006c09c 100644 --- a/test/features/parsoid/transform.js +++ b/test/features/parsoid/transform.js @@ -170,64 +170,6 @@ describe('transform api', function() { }); }); - it('supports stashing content', () => { - return preq.post({ - uri: `${server.config.baseURL('en.wikipedia.beta.wmflabs.org')}/transform/wikitext/to/html/${testPage.title}/${testPage.revision}`, - body: { - wikitext: '== ABCDEF ==', - stash: true - } - }) - .then((res) => { - assert.deepEqual(res.status, 200); - const etag = res.headers.etag; - assert.deepEqual(/\/stash"$/.test(etag), true); - return preq.post({ - uri: `${server.config.baseURL('en.wikipedia.beta.wmflabs.org')}/transform/html/to/wikitext/${testPage.title}/${testPage.revision}`, - headers: { - 'if-match': etag - }, - body: { - html: res.body.replace('>ABCDEF<', '>FECDBA<') - } - }); - }) - .then((res) => { - assert.deepEqual(res.status, 200); - assert.deepEqual(res.body, '== FECDBA =='); - }); - }); - - it('substitutes 0 as revision if not provided for stashing', () => { - return preq.post({ - uri: `${server.config.baseURL('en.wikipedia.beta.wmflabs.org')}/transform/wikitext/to/html/${testPage.title}`, - body: { - wikitext: '== ABCDEF ==', - stash: true - } - }) - .then((res) => { - assert.deepEqual(res.status, 200); - const etag = res.headers.etag; - assert.deepEqual(/^"0\/[^\/]+\/stash"$/.test(etag), true); - }); - }); - - it('does not allow stashing without title', () => { - return preq.post({ - uri: `${server.config.baseURL('en.wikipedia.beta.wmflabs.org')}/transform/wikitext/to/html`, - body: { - wikitext: '== ABCDEF ==', - stash: true - } - }) - .then(() => { - throw new Error('Error should be thrown'); - }, (e) => { - assert.deepEqual(e.status, 400); - }); - }); - it('does not allow to transform html with no tid', () => { return preq.post({ uri: `${server.config.baseURL('en.wikipedia.beta.wmflabs.org')}/transform/html/to/wikitext/${testPage.title}/${testPage.revision}`, diff --git a/v1/content.yaml b/v1/content.yaml index bd041da1b..f4906af00 100644 --- a/v1/content.yaml +++ b/v1/content.yaml @@ -177,14 +177,6 @@ paths: schema: type: boolean required: false - - name: stash - in: query - description: | - Whether to temporary stash data-parsoid in order to support transforming the - modified content later. If this parameter is set, requests are rate-limited on - a per-client basis (max 5 requests per second per client) - schema: - type: boolean - name: Accept-Language in: header description: | @@ -261,8 +253,6 @@ paths: if-unmodified-since: '{{if-unmodified-since}}' x-restbase-mode: '{{x-restbase-mode}}' x-restbase-parentrevision: '{{x-restbase-parentrevision}}' - query: - stash: '{{stash}}' x-monitor: true x-amples: # FIXME: This depends on our 'no change' detection optimization to @@ -321,14 +311,6 @@ paths: To get a 200 response instead, supply `false` to the `redirect` parameter. schema: type: boolean - - name: stash - in: query - description: | - Whether to temporary stash data-parsoid in order to support transforming the - modified content later. If this parameter is set, requests are rate-limited on - a per-client basis (max 5 requests per second per client) - schema: - type: boolean - name: Accept-Language in: header description: | @@ -417,8 +399,6 @@ paths: if-unmodified-since: '{{if-unmodified-since}}' x-restbase-mode: '{{x-restbase-mode}}' x-restbase-parentrevision: '{{x-restbase-parentrevision}}' - query: - stash: '{{stash}}' x-monitor: false /data-parsoid/{title}/{revision}/{tid}: diff --git a/v1/transform.yaml b/v1/transform.yaml index 9882cb401..7ad256b68 100644 --- a/v1/transform.yaml +++ b/v1/transform.yaml @@ -209,11 +209,10 @@ paths: - Transforms summary: Transform Wikitext to HTML description: | - Transform wikitext to HTML. Note that if you set `stash: true`, you - also need to supply the title. + Transform wikitext to HTML. - Stability: [stable](https://www.mediawiki.org/wiki/API_versioning#Stable) - - Rate limit: 25 req/s (5 req/s when `stash: true`) + - Rate limit: 25 req/s requestBody: content: multipart/form-data: @@ -228,9 +227,6 @@ paths: body_only: type: boolean description: Return only `body.innerHTML` - stash: - type: boolean - description: Whether to temporarily stash the result of the transformation required: true responses: 200: @@ -286,7 +282,6 @@ paths: body: wikitext: '{{wikitext}}' body_only: '{{body_only}}' - stash: '{{stash}}' x-monitor: false /wikitext/to/html/{title}: @@ -313,9 +308,6 @@ paths: body_only: type: boolean description: Return only `body.innerHTML` - stash: - type: boolean - description: Whether to temporarily stash the result of the transformation required: true x-request-handler: - get_from_backend: @@ -324,7 +316,6 @@ paths: body: wikitext: '{{wikitext}}' body_only: '{{body_only}}' - stash: '{{stash}}' x-monitor: true x-amples: - title: Transform wikitext to html @@ -371,9 +362,6 @@ paths: body_only: type: boolean description: Return only `body.innerHTML` - stash: - type: boolean - description: Whether to temporarily stash the result of the transformation required: true x-request-handler: - get_from_backend: @@ -382,7 +370,6 @@ paths: body: wikitext: '{{wikitext}}' body_only: '{{body_only}}' - stash: '{{stash}}' /wikitext/to/lint: post: &wikitext_to_lint_post_spec