Skip to content

Commit

Permalink
Disable storage for parsoid requests (#1330)
Browse files Browse the repository at this point in the history
* Allow storage to be disabled by domain.
* Fix default options template
* Add x-restbase-cache headers

Several endpoints already have a way to disable storage. Use the same
mechanism for parsoid. There is no need to use a regular expression to
match domains, because the config can be defined for each domain
separately.
  • Loading branch information
brightbyte committed Oct 11, 2023
1 parent eac1d08 commit 5a436df
Show file tree
Hide file tree
Showing 7 changed files with 173 additions and 65 deletions.
20 changes: 11 additions & 9 deletions config.frontend.test.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# Hacky way to parametrize RESTBase tests. TODO: Move to config?
test:
parsoid: &parsoid_test_options
disabled_storage: false
host: http://parsoid-external-ci-access.beta.wmflabs.org/w/rest.php
content_types:
html: '/^text\/html; charset=utf-8; profile="https:\/\/www\.mediawiki\.org\/wiki\/Specs\/HTML\/[\d.]+"$/'
data-parsoid: '/^application\/json; charset=utf-8; profile="https:\/\/www\.mediawiki\.org\/wiki\/Specs\/data-parsoid/[\d.]+"$/'
wikitext: '/^text\/plain; charset=utf-8; profile="https:\/\/www.mediawiki.org\/wiki\/Specs\/wikitext\/[\d.]+"$/'

default_project: &default_project
x-modules:
- spec:
Expand All @@ -8,7 +18,7 @@ default_project: &default_project
- path: projects/v1/default.wmf.yaml
options: &default_options
parsoid:
host: http://parsoid-external-ci-access.beta.wmflabs.org/w/rest.php
<<: *parsoid_test_options
grace_ttl: 1000000
action:
apiUriTemplate: "{{'https://{domain}/w/api.php'}}"
Expand Down Expand Up @@ -143,14 +153,6 @@ testing_project: &testing_project
transcludes_stream: change-prop.transcludes.resource-change
/{api:sys}: *default_sys

# Hacky way to parametrize RESTBase tests. TODO: Move to config?
test:
content_types:
html: '/^text\/html; charset=utf-8; profile="https:\/\/www\.mediawiki\.org\/wiki\/Specs\/HTML\/[\d.]+"$/'
data-parsoid: '/^application\/json; charset=utf-8; profile="https:\/\/www\.mediawiki\.org\/wiki\/Specs\/data-parsoid/[\d.]+"$/'
wikitext: '/^text\/plain; charset=utf-8; profile="https:\/\/www.mediawiki.org\/wiki\/Specs\/wikitext\/[\d.]+"$/'


# The root of the spec tree. Domains tend to share specs by referencing them
# using YAML references.
spec_root: &spec_root
Expand Down
20 changes: 11 additions & 9 deletions config.fullstack.test.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# Hacky way to parametrize RESTBase tests. TODO: Move to config?
test:
parsoid: &parsoid_test_options
disabled_storage: false
host: http://parsoid-external-ci-access.beta.wmflabs.org/w/rest.php
content_types:
html: '/^text\/html; charset=utf-8; profile="https:\/\/www\.mediawiki\.org\/wiki\/Specs\/HTML\/[\d.]+"$/'
data-parsoid: '/^application\/json; charset=utf-8; profile="https:\/\/www\.mediawiki\.org\/wiki\/Specs\/data-parsoid/[\d.]+"$/'
wikitext: '/^text\/plain; charset=utf-8; profile="https:\/\/www.mediawiki.org\/wiki\/Specs\/wikitext\/[\d.]+"$/'

default_project: &default_project
x-modules:
- spec:
Expand All @@ -7,7 +17,7 @@ default_project: &default_project
- path: projects/v1/default.wmf.yaml
options: &default_options
parsoid:
host: http://parsoid-external-ci-access.beta.wmflabs.org/w/rest.php
<<: *parsoid_test_options
grace_ttl: 1000000
action:
apiUriTemplate: "{{'https://{domain}/w/api.php'}}"
Expand Down Expand Up @@ -198,14 +208,6 @@ testing_project: &testing_project
options: *proxy_options
/{api:sys}: *default_sys

# Hacky way to parametrize RESTBase tests. TODO: Move to config?
test:
content_types:
html: '/^text\/html; charset=utf-8; profile="https:\/\/www\.mediawiki\.org\/wiki\/Specs\/HTML\/[\d.]+"$/'
data-parsoid: '/^application\/json; charset=utf-8; profile="https:\/\/www\.mediawiki\.org\/wiki\/Specs\/data-parsoid/[\d.]+"$/'
wikitext: '/^text\/plain; charset=utf-8; profile="https:\/\/www.mediawiki.org\/wiki\/Specs\/wikitext\/[\d.]+"$/'


# The root of the spec tree. Domains tend to share specs by referencing them
# using YAML references.
spec_root: &spec_root
Expand Down
1 change: 1 addition & 0 deletions projects/sys/default.wmf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ paths:
- path: sys/parsoid.js
options:
host: '{{options.parsoid.host}}'
disabled_storage: '{{options.parsoid.disabled_storage}}'
response_cache_control: '{{options.purged_cache_control}}'
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
# A list of pages that we don't want to re-render on each edit.
Expand Down
132 changes: 97 additions & 35 deletions sys/parsoid.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,18 @@ class ParsoidService {
}
}

_isStorageDisabled(domain) {
if (!this.options.disabled_storage) {
return false;
}

if (this.options.disabled_storage === true) {
return true;
}

return this.options.disabled_storage[domain] || this.options.disabled_storage.default;
}

_checkStashRate(hyper, req) {
if (!hyper.ratelimiter) {
return;
Expand Down Expand Up @@ -383,6 +395,7 @@ class ParsoidService {
if (revision !== resEtag.rev || tid !== resEtag.tid) {
throw new HTTPError({ status: 404 });
}
res.headers['x-restbase-cache'] = 'miss';
return res;
})
);
Expand All @@ -391,10 +404,23 @@ class ParsoidService {

_getPageBundleFromParsoid(hyper, req) {
const rp = req.params;
return hyper.get(this._getParsoidReq(
let path = `page/pagebundle/${encodeURIComponent(rp.title)}`;

if (rp.revision) {
path += `/${rp.revision}`;
}

const parsoidReq = this._getParsoidReq(
req,
`page/pagebundle/${encodeURIComponent(rp.title)}/${rp.revision}`
));
path
);

return hyper.get(parsoidReq)
.then((resp) => {
return resp;
}, (error) => {
throw error;
});
}

/**
Expand Down Expand Up @@ -495,7 +521,10 @@ class ParsoidService {
const rp = req.params;
const generateContent = (storageRes) => {
if (!rp.tid && (storageRes.status === 404 || storageRes.status === 200)) {
return this.generateAndSave(hyper, req, storageRes);
return this.generateAndSave(hyper, req, storageRes).then((res) => {
res.headers['x-restbase-cache'] = 'miss';
return res;
});
} else {
// Don't generate content if there's some other error.
throw storageRes;
Expand All @@ -519,47 +548,73 @@ class ParsoidService {
// check the rate limit for stashing requests
this._checkStashRate(hyper, req);

let contentReq =
this._getContentWithFallback(hyper, rp.domain, rp.title, rp.revision, rp.tid);
let contentReq;

if (mwUtil.isNoCacheRequest(req)) {
// Check content generation either way
contentReq = contentReq.then((res) => {
if (isModifiedSince(req, res)) { // Already up to date, nothing to do.
throw new HTTPError({
status: 412,
body: {
type: 'precondition_failed',
detail: 'The precondition failed'
}
});
}
return generateContent(res);
}, generateContent);
const disabledStorage = this._isStorageDisabled(req.params.domain);

if (!disabledStorage) {
contentReq = this._getContentWithFallback(
hyper, rp.domain, rp.title, rp.revision, rp.tid
).then((res) => {
res.headers['x-restbase-cache'] = res.headers['x-restbase-cache'] || 'hit'; // FIXME: miss?
return res;
});

if (mwUtil.isNoCacheRequest(req)) {

// Check content generation either way
contentReq = contentReq.then((res) => {
res.headers['x-restbase-cache'] = 'nocache';

if (isModifiedSince(req, res)) { // Already up to date, nothing to do.
throw new HTTPError({
status: 412,
body: {
type: 'precondition_failed',
detail: 'The precondition failed'
}
});
}
return generateContent(res);
}, generateContent);
} else {
// Only (possibly) generate content if there was an error
contentReq = contentReq.catch(generateContent);
}
} else {
// Only (possibly) generate content if there was an error
contentReq = contentReq.catch(generateContent);
contentReq = this._getPageBundleFromParsoid(hyper, req);
}

return contentReq
.then((res) => {
res.headers = res.headers || {};
if (!res.headers.etag) {
res.headers.etag = res.body.html.headers && res.body.html.headers.etag;
}
if (!res.headers.etag || /^null$/.test(res.headers.etag)) {
// if there is no ETag, we *could* create one here, but this
// would mean at least cache pollution, and would hide the
// fact that we have incomplete data in storage, so error out
hyper.logger.log('error/parsoid/response_etag_missing', {
msg: 'Detected a null etag in the response!'
});
throw new HTTPError({
status: 500,
body: {
title: 'no_etag',
description: 'No ETag has been provided in the response'
}
});
if (disabledStorage) {
// Generate an ETag, for consistency
const tid = uuidv1();
const revid = res.headers['content-revision-id'] || '0';
const etag = mwUtil.makeETag(revid, tid);
res.body.html.body = insertTidMeta(res.body.html.body, tid);
res.body.html.headers.etag = res.headers.etag = etag;
res.headers['x-restbase-cache'] = 'disabled';
} else {
// if there is no ETag, we *could* create one here, but this
// would mean at least cache pollution, and would hide the
// fact that we have incomplete data in storage, so error out
hyper.logger.log('error/parsoid/response_etag_missing', {
msg: 'Detected a null etag in the response!'
});
throw new HTTPError({
status: 500,
body: {
title: 'no_etag',
description: 'No ETag has been provided in the response'
}
});
}
}
if (req.query.stash) {
return this._saveParsoidResultToFallback(hyper, req, res)
Expand All @@ -569,10 +624,17 @@ class ParsoidService {
})
.then((res) => {
const etag = res.headers.etag;
const cacheInfo = res.headers['x-restbase-cache'];

// Chop off the correct format to return.
res = Object.assign({ status: res.status }, res.body[format]);
res.headers = res.headers || {};
res.headers.etag = etag;

if (cacheInfo) {
res.headers['x-restbase-cache'] = cacheInfo;
}

mwUtil.normalizeContentType(res);
if (req.query.stash) {
// The stash is used by clients that want further support
Expand Down
2 changes: 1 addition & 1 deletion test/features/pagecontent/language_variants.js
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ describe('Language variants', function() {
assert.deepEqual(res.headers['content-language'], 'de');
assert.deepEqual(res.headers['x-restbase-sunset'] || null, 'true');
assert.checkString(res.headers.etag, /^"\d+\/[a-f0-9-]+"$/);
assert.deepEqual('Das ist eine testseite', res.body.extract);
assert.deepEqual(res.body.extract, 'Das ist eine testseite');
})
});

Expand Down
38 changes: 31 additions & 7 deletions test/features/pagecontent/pagecontent.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ describe('item requests', function() {
this.timeout(20000);
let pagingToken = '';
let contentTypes;
let disabledStorage;
const title = 'Earth';
const revision = '358126';
const prevRevisions = ['358125', '214592', '214591']
Expand All @@ -17,13 +18,14 @@ describe('item requests', function() {
before(() => server.start()
.then(() => {
contentTypes = server.config.conf.test.content_types;
disabledStorage = server.config.conf.test.parsoid.disabled_storage;
}));
after(() => server.stop());

const deniedTitle = 'User:Pchelolo/Restricted Revision';
const deniedRev = '409440';

function contentURI(format) {
function deniedContentUri(format) {
return [server.config.bucketURL(), format, encodeURIComponent(deniedTitle), deniedRev].join('/');
}
const assertCORS = (res) => {
Expand Down Expand Up @@ -55,22 +57,44 @@ describe('item requests', function() {
});
});

it('should transparently create a new HTML revision for Main_Page', () => {
it('should transparently create a new HTML revision for Main_Page', function () {
if ( disabledStorage ) {
this.skip();
}

return preq.get({
uri: `${server.config.bucketURL()}/html/Main_Page`,
})
.then((res) => {
assert.deepEqual(res.status, 200);
assert.validateListHeader(res.headers.vary, { require: ['Accept'], disallow: [''] });

// NOTE: We have to accept "hit" here, because the test setup has a persistent cache.
assert.deepEqual(res.headers['x-restbase-cache'], 'hit|miss');

return preq.get({
uri: `${server.config.bucketURL()}/html/Main_Page`
});
})
.then((res) => {
assert.deepEqual(res.status, 200);
assert.deepEqual(res.headers['x-restbase-cache'], 'hit');
assert.validateListHeader(res.headers.vary, { require: ['Accept'], disallow: [''] });
});
});
it('should not cache HTML for Main_Page when storage is disabled', function () {
if ( !disabledStorage ) {
this.skip();
}

return preq.get({
uri: `${server.config.bucketURL()}/html/Main_Page`,
})
.then((res) => {
assert.deepEqual(res.status, 200);
assert.deepEqual(res.headers['x-restbase-cache'], 'disabled');
})
});
it(`should transparently create a new HTML revision with id ${prevRevisions[0]}`, () => {
return preq.get({
uri: `${server.config.bucketURL()}/html/${title}/${prevRevisions[0]}`,
Expand Down Expand Up @@ -252,19 +276,19 @@ describe('item requests', function() {
});

it('should deny access to the HTML of a restricted revision', () => {
return preq.get({ uri: contentURI('html') }).then((res) => {
throw new Error(`Expected status 403, but gotten ${res.status}`);
return preq.get({ uri: deniedContentUri('html') }).then((res) => {
assert.fail(`Expected status 403, but gotten ${res.status}`);
}, (res) => {
assert.deepEqual(res.status, 403);
});
});

it('should deny access to the same HTML even after re-fetching it', () => {
it('should deny access to restricted revision even after re-fetching it', () => {
return preq.get({
uri: contentURI('html'),
uri: deniedContentUri('html'),
headers: { 'cache-control': 'no-cache' }
}).then((res) => {
throw new Error(`Expected status 403, but gotten ${res.status}`);
assert.fail(`Expected status 403, but gotten ${res.status}`);
}, (res) => {
assert.deepEqual(res.status, 403);
});
Expand Down
Loading

0 comments on commit 5a436df

Please sign in to comment.