- added earliestPublicationDate for App/Models/Dataset.ts
All checks were successful
CI Pipeline / japa-tests (push) Successful in 49s
All checks were successful
CI Pipeline / japa-tests (push) Successful in 49s
- new classes TokenWorkerService.ts, TokenWorker.ts and ResumptionToken.ts for using REDIS with paging OAI results - deletd public/asstes2/langCodeMap.xml: integrated it directly in datasetxml2oai-pmh.xslt - added redis npm package - added TokenWorkerProvider.ts for using singleton of TokenWorkerService inside OaiController.ts - added config/oai.ts for oai related configs from .env-file - adapted XmlModel.ts for grting domDocument from database
This commit is contained in:
parent
2a7480d2ed
commit
7915f66dd6
16 changed files with 691 additions and 89 deletions
|
@ -13,8 +13,15 @@ import { OaiErrorCodes, OaiModelError } from 'App/Exceptions/OaiErrorCodes';
|
|||
import { OaiModelException, BadOaiModelException } from 'App/Exceptions/OaiModelException';
|
||||
import Dataset from 'App/Models/Dataset';
|
||||
import Collection from 'App/Models/Collection';
|
||||
import { getDomain } from 'App/Utils/utility-functions';
|
||||
import { getDomain, preg_match } from 'App/Utils/utility-functions';
|
||||
import XmlModel from 'App/Library/XmlModel';
|
||||
import Logger from '@ioc:Adonis/Core/Logger';
|
||||
import ResumptionToken from 'App/Library/Oai/ResumptionToken';
|
||||
import { ModelQueryBuilderContract } from '@ioc:Adonis/Lucid/Orm';
|
||||
import Config from '@ioc:Adonis/Core/Config';
|
||||
import { inject } from '@adonisjs/fold';
|
||||
// import { TokenWorkerContract } from "MyApp/Models/TokenWorker";
|
||||
import TokenWorkerContract from 'App/Library/Oai/TokenWorker';
|
||||
|
||||
interface XslTParameter {
|
||||
[key: string]: any;
|
||||
|
@ -24,12 +31,19 @@ interface Dictionary {
|
|||
[index: string]: string;
|
||||
}
|
||||
|
||||
interface ListParameter {
|
||||
cursor: number;
|
||||
totalIds: number;
|
||||
start: number;
|
||||
reldocIds: (number | null)[];
|
||||
metadataPrefix: string;
|
||||
}
|
||||
|
||||
@inject(['App/Library/Oai/TokenWorkerContract'])
|
||||
export default class OaiController {
|
||||
private deliveringDocumentStates = ['published', 'deleted'];
|
||||
// private sampleRegEx = /^[A-Za-zäüÄÜß0-9\-_.!~]+$/;
|
||||
private sampleRegEx = /^[A-Za-zäüÄÜß0-9\-_.!~]+$/;
|
||||
private xsltParameter: XslTParameter;
|
||||
// private configuration: Configuration;
|
||||
// private tokenWorker: TokenWorker;
|
||||
|
||||
/**
|
||||
* Holds xml representation of document information to be processed.
|
||||
|
@ -39,13 +53,9 @@ export default class OaiController {
|
|||
private xml: XMLBuilder;
|
||||
private proc;
|
||||
|
||||
constructor() {
|
||||
constructor(public tokenWorker: TokenWorkerContract) {
|
||||
// Load the XSLT file
|
||||
this.proc = readFileSync('public/assets2/datasetxml2oai.sef.json');
|
||||
// tests
|
||||
// const xslPath = 'assets/datasetxml2oai-pmh.xslt'; // Replace with the actual path to your XSLT file
|
||||
// this.proc = readFileSync(xslPath, 'utf-8');
|
||||
// this.configuration = new Configuration();
|
||||
dayjs.extend(utc);
|
||||
dayjs.extend(timezone);
|
||||
}
|
||||
|
@ -66,8 +76,15 @@ export default class OaiController {
|
|||
xsltParameter['oai_error_code'] = 'unknown';
|
||||
xsltParameter['oai_error_message'] = 'Only POST and GET methods are allowed for OAI-PMH.';
|
||||
}
|
||||
|
||||
let earliestDateFromDb;
|
||||
// const oaiRequest: OaiParameter = request.body;
|
||||
try {
|
||||
const firstPublishedDataset: Dataset | null = await Dataset.earliestPublicationDate();
|
||||
firstPublishedDataset != null &&
|
||||
(earliestDateFromDb = firstPublishedDataset.server_date_published.toFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"));
|
||||
this.xsltParameter['earliestDatestamp'] = earliestDateFromDb;
|
||||
// start the request
|
||||
await this.handleRequest(oaiRequest, request);
|
||||
} catch (error) {
|
||||
if (error instanceof OaiModelException) {
|
||||
|
@ -87,7 +104,7 @@ export default class OaiController {
|
|||
|
||||
const xmlString = this.xml.end({ prettyPrint: true });
|
||||
|
||||
let xmlOutput;
|
||||
let xmlOutput; // = xmlString;
|
||||
try {
|
||||
const result = await transform({
|
||||
// stylesheetFileName: `${config.TMP_BASE_DIR}/data-quality/rules/iati.sef.json`,
|
||||
|
@ -123,7 +140,7 @@ export default class OaiController {
|
|||
this.xsltParameter['unixTimestamp'] = now.unix();
|
||||
|
||||
// set OAI base url
|
||||
const baseDomain = process.env.BASE_DOMAIN || 'localhost';
|
||||
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
|
||||
this.xsltParameter['baseURL'] = baseDomain + '/oai';
|
||||
this.xsltParameter['repURL'] = request.protocol() + '://' + request.hostname();
|
||||
this.xsltParameter['downloadLink'] = request.protocol() + '://' + request.hostname() + '/file/download/';
|
||||
|
@ -139,13 +156,11 @@ export default class OaiController {
|
|||
this.handleListMetadataFormats();
|
||||
} else if (verb == 'GetRecord') {
|
||||
await this.handleGetRecord(oaiRequest);
|
||||
}
|
||||
// else if (verb == "ListRecords") {
|
||||
// await this.handleListRecords(oaiRequest);
|
||||
// } else if (verb == "ListIdentifiers") {
|
||||
// await this.handleListIdentifiers(oaiRequest);
|
||||
// }
|
||||
else if (verb == 'ListSets') {
|
||||
} else if (verb == 'ListRecords') {
|
||||
await this.handleListRecords(oaiRequest);
|
||||
} else if (verb == 'ListIdentifiers') {
|
||||
await this.handleListIdentifiers(oaiRequest);
|
||||
} else if (verb == 'ListSets') {
|
||||
await this.handleListSets();
|
||||
} else {
|
||||
this.handleIllegalVerb();
|
||||
|
@ -197,7 +212,7 @@ export default class OaiController {
|
|||
const sets: { [key: string]: string } = {
|
||||
'open_access': 'Set for open access licenses',
|
||||
'doc-type:ResearchData': 'Set for document type ResearchData',
|
||||
// ...(await this.getSetsForDatasetTypes()),
|
||||
...(await this.getSetsForDatasetTypes()),
|
||||
...(await this.getSetsForCollections()),
|
||||
// ... await this.getSetsForProjects(),
|
||||
} as Dictionary;
|
||||
|
@ -214,7 +229,13 @@ export default class OaiController {
|
|||
this.xsltParameter['repIdentifier'] = repIdentifier;
|
||||
|
||||
const dataId = this.validateAndGetIdentifier(oaiRequest);
|
||||
const dataset = await Dataset.query().where('publish_id', dataId).preload('xmlCache').preload('collections').first();
|
||||
const dataset = await Dataset.query()
|
||||
.where('publish_id', dataId)
|
||||
.preload('xmlCache')
|
||||
.preload('collections', (builder) => {
|
||||
builder.preload('collectionRole');
|
||||
})
|
||||
.first();
|
||||
|
||||
if (!dataset || !dataset.publish_id) {
|
||||
throw new OaiModelException(
|
||||
|
@ -234,6 +255,229 @@ export default class OaiController {
|
|||
await this.createXmlRecord(dataset, datasetNode);
|
||||
}
|
||||
|
||||
protected async handleListIdentifiers(oaiRequest: Dictionary) {
|
||||
!this.tokenWorker.isConnected && (await this.tokenWorker.connect());
|
||||
|
||||
const maxIdentifier: number = Config.get('oai.max.listidentifiers', 100);
|
||||
await this.handleLists(oaiRequest, maxIdentifier);
|
||||
}
|
||||
|
||||
protected async handleListRecords(oaiRequest) {
|
||||
!this.tokenWorker.isConnected && (await this.tokenWorker.connect());
|
||||
|
||||
const maxRecords: number = Config.get('oai.max.listrecords', 100);
|
||||
await this.handleLists(oaiRequest, maxRecords);
|
||||
}
|
||||
|
||||
private async handleLists(oaiRequest: Dictionary, maxRecords: number) {
|
||||
maxRecords = maxRecords || 100;
|
||||
const repIdentifier = 'tethys.at';
|
||||
this.xsltParameter['repIdentifier'] = repIdentifier;
|
||||
const datasetNode = this.xml.root().ele('Datasets');
|
||||
|
||||
// list initialisation
|
||||
const numWrapper: ListParameter = {
|
||||
cursor: 0,
|
||||
totalIds: 0,
|
||||
start: maxRecords + 1,
|
||||
reldocIds: [],
|
||||
metadataPrefix: '',
|
||||
};
|
||||
|
||||
// resumptionToken is defined
|
||||
if ('resumptionToken' in oaiRequest) {
|
||||
await this.handleResumptionToken(oaiRequest, maxRecords, numWrapper);
|
||||
} else {
|
||||
// no resumptionToken is given
|
||||
await this.handleNoResumptionToken(oaiRequest, numWrapper);
|
||||
}
|
||||
|
||||
// handling of document ids
|
||||
const restIds = numWrapper.reldocIds as number[];
|
||||
const workIds = restIds.splice(0, maxRecords) as number[]; // array_splice(restIds, 0, maxRecords);
|
||||
|
||||
// no records returned
|
||||
if (workIds.length == 0) {
|
||||
throw new OaiModelException(
|
||||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||||
'The combination of the given values results in an empty list.',
|
||||
OaiErrorCodes.NORECORDSMATCH,
|
||||
);
|
||||
}
|
||||
|
||||
const datasets: Dataset[] = await Dataset.query()
|
||||
.whereIn('publish_id', workIds)
|
||||
.preload('xmlCache')
|
||||
.preload('collections', (builder) => {
|
||||
builder.preload('collectionRole');
|
||||
})
|
||||
.orderBy('publish_id');
|
||||
|
||||
for (const dataset of datasets) {
|
||||
await this.createXmlRecord(dataset, datasetNode);
|
||||
}
|
||||
|
||||
// store the further Ids in a resumption-file
|
||||
const countRestIds = restIds.length; //84
|
||||
if (countRestIds > 0) {
|
||||
const token = new ResumptionToken();
|
||||
token.startPosition = numWrapper.start; //101
|
||||
token.totalIds = numWrapper.totalIds; //184
|
||||
token.documentIds = restIds; //101 -184
|
||||
token.metadataPrefix = numWrapper.metadataPrefix;
|
||||
|
||||
// $tokenWorker->storeResumptionToken($token);
|
||||
const res: string = await this.tokenWorker.set(token);
|
||||
|
||||
// set parameters for the resumptionToken-node
|
||||
// const res = token.ResumptionId;
|
||||
this.setParamResumption(res, numWrapper.cursor, numWrapper.totalIds);
|
||||
}
|
||||
}
|
||||
|
||||
private async handleResumptionToken(oaiRequest: Dictionary, maxRecords: number, numWrapper) {
|
||||
const resParam = oaiRequest['resumptionToken']; //e.g. "158886496600000"
|
||||
const token = await this.tokenWorker.get(resParam);
|
||||
|
||||
if (!token) {
|
||||
throw new OaiModelException(StatusCodes.INTERNAL_SERVER_ERROR, 'cache is outdated.', OaiErrorCodes.BADRESUMPTIONTOKEN);
|
||||
}
|
||||
|
||||
numWrapper.cursor = token.startPosition - 1; //startet dann bei Index 10
|
||||
numWrapper.start = token.startPosition + maxRecords;
|
||||
numWrapper.totalIds = token.totalIds;
|
||||
numWrapper.reldocIds = token.documentIds;
|
||||
numWrapper.metadataPrefix = token.metadataPrefix;
|
||||
|
||||
this.xsltParameter['oai_metadataPrefix'] = numWrapper.metadataPrefix;
|
||||
}
|
||||
|
||||
private async handleNoResumptionToken(oaiRequest: Dictionary, numWrapper) {
|
||||
// no resumptionToken is given
|
||||
if ('metadataPrefix' in oaiRequest) {
|
||||
numWrapper.metadataPrefix = oaiRequest['metadataPrefix'];
|
||||
} else {
|
||||
throw new OaiModelException(
|
||||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||||
'The prefix of the metadata argument is unknown.',
|
||||
OaiErrorCodes.BADARGUMENT,
|
||||
);
|
||||
}
|
||||
this.xsltParameter['oai_metadataPrefix'] = numWrapper.metadataPrefix;
|
||||
|
||||
let finder: ModelQueryBuilderContract<typeof Dataset, Dataset> = Dataset.query();
|
||||
// add server state restrictions
|
||||
finder.whereIn('server_state', this.deliveringDocumentStates);
|
||||
if ('set' in oaiRequest) {
|
||||
const set = oaiRequest['set'] as string;
|
||||
const setArray = set.split(':');
|
||||
|
||||
if (setArray[0] == 'data-type') {
|
||||
if (setArray.length == 2 && setArray[1]) {
|
||||
finder.where('type', setArray[1]);
|
||||
}
|
||||
} else if (setArray[0] == 'open_access') {
|
||||
const openAccessLicences = ['CC-BY-4.0', 'CC-BY-SA-4.0'];
|
||||
finder.andWhereHas('licenses', (query) => {
|
||||
query.whereIn('name', openAccessLicences);
|
||||
});
|
||||
} else if (setArray[0] == 'ddc') {
|
||||
if (setArray.length == 2 && setArray[1] != '') {
|
||||
finder.andWhereHas('collections', (query) => {
|
||||
query.where('number', setArray[1]);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// const timeZone = "Europe/Vienna"; // Canonical time zone name
|
||||
// &from=2020-09-03&until2020-09-03
|
||||
// &from=2020-09-11&until=2021-05-11
|
||||
if ('from' in oaiRequest && 'until' in oaiRequest) {
|
||||
const from = oaiRequest['from'] as string;
|
||||
let fromDate = dayjs(from); //.tz(timeZone);
|
||||
const until = oaiRequest['until'] as string;
|
||||
let untilDate = dayjs(until); //.tz(timeZone);
|
||||
if (!fromDate.isValid() || !untilDate.isValid()) {
|
||||
throw new OaiModelException(StatusCodes.INTERNAL_SERVER_ERROR, 'Date Parameter is not valid.', OaiErrorCodes.BADARGUMENT);
|
||||
}
|
||||
fromDate = dayjs.tz(from, 'Europe/Vienna');
|
||||
untilDate = dayjs.tz(until, 'Europe/Vienna');
|
||||
|
||||
if (from.length != until.length) {
|
||||
throw new OaiModelException(
|
||||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||||
'The request has different granularities for the from and until parameters.',
|
||||
OaiErrorCodes.BADARGUMENT,
|
||||
);
|
||||
}
|
||||
fromDate.hour() == 0 && (fromDate = fromDate.startOf('day'));
|
||||
untilDate.hour() == 0 && (untilDate = untilDate.endOf('day'));
|
||||
|
||||
finder.whereBetween('server_date_published', [fromDate.format('YYYY-MM-DD HH:mm:ss'), untilDate.format('YYYY-MM-DD HH:mm:ss')]);
|
||||
} else if ('from' in oaiRequest && !('until' in oaiRequest)) {
|
||||
const from = oaiRequest['from'] as string;
|
||||
let fromDate = dayjs(from);
|
||||
if (!fromDate.isValid()) {
|
||||
throw new OaiModelException(
|
||||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||||
'From date parameter is not valid.',
|
||||
OaiErrorCodes.BADARGUMENT,
|
||||
);
|
||||
}
|
||||
fromDate = dayjs.tz(from, 'Europe/Vienna');
|
||||
fromDate.hour() == 0 && (fromDate = fromDate.startOf('day'));
|
||||
|
||||
const now = dayjs();
|
||||
if (fromDate.isAfter(now)) {
|
||||
throw new OaiModelException(
|
||||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||||
'Given from date is greater than now. The given values results in an empty list.',
|
||||
OaiErrorCodes.NORECORDSMATCH,
|
||||
);
|
||||
} else {
|
||||
finder.andWhere('server_date_published', '>=', fromDate.format('YYYY-MM-DD HH:mm:ss'));
|
||||
}
|
||||
} else if (!('from' in oaiRequest) && 'until' in oaiRequest) {
|
||||
const until = oaiRequest['until'] as string;
|
||||
let untilDate = dayjs(until);
|
||||
if (!untilDate.isValid()) {
|
||||
throw new OaiModelException(
|
||||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||||
'Until date parameter is not valid.',
|
||||
OaiErrorCodes.BADARGUMENT,
|
||||
);
|
||||
}
|
||||
untilDate = dayjs.tz(until, 'Europe/Vienna');
|
||||
untilDate.hour() == 0 && (untilDate = untilDate.endOf('day'));
|
||||
|
||||
const firstPublishedDataset: Dataset = (await Dataset.earliestPublicationDate()) as Dataset;
|
||||
const earliestPublicationDate = dayjs(firstPublishedDataset.server_date_published.toISO()); //format("YYYY-MM-DDThh:mm:ss[Z]"));
|
||||
if (earliestPublicationDate.isAfter(untilDate)) {
|
||||
throw new OaiModelException(
|
||||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||||
`earliestDatestamp is greater than given until date.
|
||||
The given values results in an empty list.`,
|
||||
OaiErrorCodes.NORECORDSMATCH,
|
||||
);
|
||||
} else {
|
||||
finder.andWhere('server_date_published', '<=', untilDate.format('YYYY-MM-DD HH:mm:ss'));
|
||||
}
|
||||
}
|
||||
|
||||
let reldocIdsDocs = await finder.select('publish_id').orderBy('publish_id');
|
||||
numWrapper.reldocIds = reldocIdsDocs.map((dat) => dat.publish_id);
|
||||
numWrapper.totalIds = numWrapper.reldocIds.length; //212
|
||||
}
|
||||
|
||||
private setParamResumption(res: string, cursor: number, totalIds: number) {
|
||||
const tomorrow = dayjs().add(1, 'day').format('YYYY-MM-DDThh:mm:ss[Z]');
|
||||
this.xsltParameter['dateDelete'] = tomorrow;
|
||||
this.xsltParameter['res'] = res;
|
||||
this.xsltParameter['cursor'] = cursor;
|
||||
this.xsltParameter['totalIds'] = totalIds;
|
||||
}
|
||||
|
||||
private validateAndGetIdentifier(oaiRequest: Dictionary): number {
|
||||
// Identifier references metadata Urn, not plain Id!
|
||||
// Currently implemented as 'oai:foo.bar.de:{docId}' or 'urn:nbn...-123'
|
||||
|
@ -283,12 +527,12 @@ export default class OaiController {
|
|||
dataset.publish_id && this.addLandingPageAttribute(domNode, dataset.publish_id.toString());
|
||||
this.addSpecInformation(domNode, 'data-type:' + dataset.type);
|
||||
|
||||
// if (dataset.collections) {
|
||||
// for (const coll of dataset.collections) {
|
||||
// const collRole = await coll.getCollectionRole();
|
||||
// this.addSpecInformation(domNode, collRole.oai_name + ':' + coll.number);
|
||||
// }
|
||||
// }
|
||||
if (dataset.collections) {
|
||||
for (const coll of dataset.collections) {
|
||||
const collRole = coll.collectionRole;
|
||||
this.addSpecInformation(domNode, collRole.oai_name + ':' + coll.number);
|
||||
}
|
||||
}
|
||||
|
||||
datasetNode.import(domNode);
|
||||
}
|
||||
|
@ -315,7 +559,7 @@ export default class OaiController {
|
|||
}
|
||||
|
||||
private addLandingPageAttribute(domNode: XMLBuilder, dataid: string) {
|
||||
const baseDomain = process.env.BASE_DOMAIN || 'localhost';
|
||||
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
|
||||
const url = 'https://' + getDomain(baseDomain) + '/dataset/' + dataid;
|
||||
// add attribute du dataset xml element
|
||||
domNode.att('landingpage', url);
|
||||
|
@ -368,26 +612,24 @@ export default class OaiController {
|
|||
return sets;
|
||||
}
|
||||
|
||||
// private async getSetsForDatasetTypes(): Promise<IDictionary> {
|
||||
// const sets: { [key: string]: string } = {} as IDictionary;
|
||||
private async getSetsForDatasetTypes(): Promise<Dictionary> {
|
||||
const sets: { [key: string]: string } = {} as Dictionary;
|
||||
|
||||
// const datasets: Array<Dataset> = await Dataset.findAll({
|
||||
// attributes: ["type"],
|
||||
// where: { server_state: { [Sequelize.Op.eq]: "published" } },
|
||||
// });
|
||||
// datasets.forEach((dataset) => {
|
||||
// if (dataset.type && false == preg_match(this.sampleRegEx, dataset.type)) {
|
||||
// const msg = `Invalid SetSpec (data-type='${dataset.type}').
|
||||
// Allowed characters are [${this.sampleRegEx}].`;
|
||||
// Logger.err(`OAI: ${msg}`);
|
||||
// // Log::error("OAI-PMH: $msg");
|
||||
// return;
|
||||
// }
|
||||
// const setSpec = "data-type:" + dataset.type;
|
||||
// sets[setSpec] = `Set for document type '${dataset.type}'`;
|
||||
// });
|
||||
// return sets;
|
||||
// }
|
||||
const datasets: Array<Dataset> = await Dataset.query().select('type').where('server_state', 'published');
|
||||
|
||||
datasets.forEach((dataset) => {
|
||||
if (dataset.type && false == preg_match(this.sampleRegEx, dataset.type)) {
|
||||
const msg = `Invalid SetSpec (data-type='${dataset.type}').
|
||||
Allowed characters are [${this.sampleRegEx}].`;
|
||||
// Log::error("OAI-PMH: $msg");
|
||||
Logger.error(`OAI-PMH: ${msg}`);
|
||||
return;
|
||||
}
|
||||
const setSpec = 'data-type:' + dataset.type;
|
||||
sets[setSpec] = `Set for document type '${dataset.type}'`;
|
||||
});
|
||||
return sets;
|
||||
}
|
||||
|
||||
private handleIllegalVerb() {
|
||||
this.xsltParameter['oai_error_code'] = 'badVerb';
|
||||
|
|
Loading…
Add table
editor.link_modal.header
Reference in a new issue