All checks were successful
CI / container-job (push) Successful in 49s
- Modified Api/Authors.Controller.ts to use only personal types and sort by dataset_count. - Completely rewritten AvatarController.ts. - Added new Api/CollectionsController.ts for querying collections and collection_roles. - Modified Api/DatasetController.ts to preload titles, identifier and order by server_date_published. - Modified FileController.ts to serve files from /storage/app/data/ instead of /storage/app/public. - Added new Api/UserController for requesting submitters (getSubmitters). - Improved OaiController.ts with performant DB queries for better ResumptionToken handling. - Modified Submitter/DatasetController.ts by adding a categorize method for library classification. - Rewritten ResumptionToken.ts. - Improved TokenWorkerService.ts to utilize browser fingerprint. - Edited dataset.ts by adding the doiIdentifier property. - Enhanced person.ts to improve the fullName property. - Completely rewritten AsideMenuItem.vue component. - Updated CarBoxClient.vue to use TypeScript. - Added new CardBoxDataset.vue for displaying recent datasets on the dashboard. - Completely rewritten TableSampleClients.vue for the dashboard. - Completely rewritten UserAvatar.vue. - Made small layout changes in Dashboard.vue. - Added new Category.vue for browsing scientific collections. - Adapted the pinia store in main.ts. - Added additional routes in start/routes.ts and start/api/routes.ts. - Improved referenceValidation.ts for better ISBN existence checking. - NPM dependency updates.
727 lines
30 KiB
TypeScript
727 lines
30 KiB
TypeScript
import type { HttpContext } from '@adonisjs/core/http';
|
||
// import { RequestContract } from '@ioc:Adonis/Core/Request';
|
||
import { Request } from '@adonisjs/core/http';
|
||
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
|
||
import { create } from 'xmlbuilder2';
|
||
import dayjs, { Dayjs } from 'dayjs';
|
||
import utc from 'dayjs/plugin/utc.js';
|
||
import timezone from 'dayjs/plugin/timezone.js';
|
||
import { readFileSync } from 'fs';
|
||
import { StatusCodes } from 'http-status-codes';
|
||
import SaxonJS from 'saxon-js';
|
||
// import { Xslt, xmlParse } from 'xslt-processor'
|
||
import { OaiErrorCodes, OaiModelError } from '#app/exceptions/OaiErrorCodes';
|
||
import { OaiModelException, BadOaiModelException } from '#app/exceptions/OaiModelException';
|
||
import Dataset from '#models/dataset';
|
||
import Collection from '#models/collection';
|
||
import { getDomain, preg_match } from '#app/utils/utility-functions';
|
||
import XmlModel from '#app/Library/XmlModel';
|
||
import logger from '@adonisjs/core/services/logger';
|
||
import ResumptionToken from '#app/Library/Oai/ResumptionToken';
|
||
// import Config from '@ioc:Adonis/Core/Config';
|
||
import config from '@adonisjs/core/services/config';
|
||
// import { inject } from '@adonisjs/fold';
|
||
import { inject } from '@adonisjs/core';
|
||
// import { TokenWorkerContract } from "MyApp/Models/TokenWorker";
|
||
import TokenWorkerContract from '#library/Oai/TokenWorkerContract';
|
||
import { ModelQueryBuilderContract } from '@adonisjs/lucid/types/model';
|
||
|
||
interface XslTParameter {
|
||
[key: string]: any;
|
||
}
|
||
|
||
interface Dictionary {
|
||
[index: string]: string;
|
||
}
|
||
|
||
interface PagingParameter {
|
||
cursor: number;
|
||
totalLength: number;
|
||
start: number;
|
||
nextDocIds: number[];
|
||
activeWorkIds: number[];
|
||
metadataPrefix: string;
|
||
queryParams: Object;
|
||
}
|
||
|
||
@inject()
|
||
export default class OaiController {
|
||
private deliveringDocumentStates = ['published', 'deleted'];
|
||
private sampleRegEx = /^[A-Za-zäüÄÜß0-9\-_.!~]+$/;
|
||
private xsltParameter: XslTParameter;
|
||
|
||
private firstPublishedDataset: Dataset | null;
|
||
/**
|
||
* Holds xml representation of document information to be processed.
|
||
*
|
||
* @var xmlbuilder.XMLDocument | null Defaults to null.
|
||
*/
|
||
private xml: XMLBuilder;
|
||
private proc;
|
||
|
||
constructor(public tokenWorker: TokenWorkerContract) {
|
||
// Load the XSLT file
|
||
this.proc = readFileSync('public/assets2/datasetxml2oai.sef.json');
|
||
dayjs.extend(utc);
|
||
dayjs.extend(timezone);
|
||
}
|
||
|
||
public async index({ response, request }: HttpContext): Promise<void> {
|
||
this.xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
|
||
|
||
// this.proc = new XSLTProcessor();
|
||
// const stylesheet = readFileSync(__dirname + "/datasetxml2oai.sef.json");
|
||
const xsltParameter: XslTParameter = (this.xsltParameter = {});
|
||
|
||
let oaiRequest: Dictionary = {};
|
||
if (request.method() === 'POST') {
|
||
oaiRequest = request.body();
|
||
} else if (request.method() === 'GET') {
|
||
oaiRequest = request.qs();
|
||
} else {
|
||
xsltParameter['oai_error_code'] = 'unknown';
|
||
xsltParameter['oai_error_message'] = 'Only POST and GET methods are allowed for OAI-PMH.';
|
||
}
|
||
|
||
let earliestDateFromDb;
|
||
// const oaiRequest: OaiParameter = request.body;
|
||
try {
|
||
this.firstPublishedDataset = await Dataset.earliestPublicationDate();
|
||
this.firstPublishedDataset != null &&
|
||
(earliestDateFromDb = this.firstPublishedDataset.server_date_published.toFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"));
|
||
this.xsltParameter['earliestDatestamp'] = earliestDateFromDb;
|
||
// start the request
|
||
await this.handleRequest(oaiRequest, request);
|
||
} catch (error) {
|
||
if (error instanceof OaiModelException) {
|
||
const code = error.oaiCode;
|
||
let oaiErrorCode: string | undefined = 'Unknown oai error code ' + code;
|
||
if (OaiModelError.has(error.oaiCode) && OaiModelError.get(code) !== undefined) {
|
||
oaiErrorCode = OaiModelError.get(error.oaiCode);
|
||
}
|
||
this.xsltParameter['oai_error_code'] = oaiErrorCode;
|
||
this.xsltParameter['oai_error_message'] = error.message;
|
||
} else {
|
||
// // return next(error); // passing to default express middleware error handler
|
||
this.xsltParameter['oai_error_code'] = 'unknown';
|
||
this.xsltParameter['oai_error_message'] = 'An internal error occured.';
|
||
}
|
||
}
|
||
|
||
const xmlString = this.xml.end({ prettyPrint: true });
|
||
|
||
let xmlOutput; // = xmlString;
|
||
try {
|
||
const result = await SaxonJS.transform({
|
||
// stylesheetFileName: `${config.TMP_BASE_DIR}/data-quality/rules/iati.sef.json`,
|
||
stylesheetText: this.proc,
|
||
destination: 'serialized',
|
||
// sourceFileName: sourceFile,
|
||
sourceText: xmlString,
|
||
stylesheetParams: xsltParameter,
|
||
// logLevel: 10,
|
||
});
|
||
xmlOutput = result.principalResult;
|
||
} catch (error) {
|
||
return response.status(500).json({
|
||
message: 'An error occurred while creating the user',
|
||
error: error.message,
|
||
});
|
||
}
|
||
|
||
response
|
||
.header('Content-Type', 'application/xml; charset=utf-8')
|
||
.header('Access-Control-Allow-Origin', '*')
|
||
.header('Access-Control-Allow-Methods', 'GET,POST');
|
||
response.status(StatusCodes.OK).send(xmlOutput);
|
||
}
|
||
|
||
protected async handleRequest(oaiRequest: Dictionary, request: Request) {
|
||
// Setup stylesheet
|
||
// $this->loadStyleSheet('datasetxml2oai-pmh.xslt');
|
||
|
||
// Set response time
|
||
const now: Dayjs = dayjs();
|
||
this.xsltParameter['responseDate'] = now.format('YYYY-MM-DDTHH:mm:ss[Z]');
|
||
this.xsltParameter['unixTimestamp'] = now.unix();
|
||
|
||
// set OAI base url
|
||
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
|
||
this.xsltParameter['baseURL'] = baseDomain + '/oai';
|
||
this.xsltParameter['repURL'] = request.protocol() + '://' + request.hostname();
|
||
this.xsltParameter['downloadLink'] = request.protocol() + '://' + request.hostname() + '/file/download/';
|
||
this.xsltParameter['doiLink'] = 'https://doi.org/';
|
||
this.xsltParameter['doiPrefix'] = 'info:eu-repo/semantics/altIdentifier/doi/';
|
||
|
||
if (oaiRequest['verb']) {
|
||
const verb = oaiRequest['verb'];
|
||
this.xsltParameter['oai_verb'] = verb;
|
||
if (verb === 'Identify') {
|
||
this.handleIdentify();
|
||
} else if (verb === 'ListMetadataFormats') {
|
||
this.handleListMetadataFormats();
|
||
} else if (verb == 'GetRecord') {
|
||
await this.handleGetRecord(oaiRequest);
|
||
} else if (verb == 'ListRecords') {
|
||
// Get browser fingerprint from the request:
|
||
const browserFingerprint = this.getBrowserFingerprint(request);
|
||
await this.handleListRecords(oaiRequest, browserFingerprint);
|
||
} else if (verb == 'ListIdentifiers') {
|
||
// Get browser fingerprint from the request:
|
||
const browserFingerprint = this.getBrowserFingerprint(request);
|
||
await this.handleListIdentifiers(oaiRequest, browserFingerprint);
|
||
} else if (verb == 'ListSets') {
|
||
await this.handleListSets();
|
||
} else {
|
||
this.handleIllegalVerb();
|
||
}
|
||
} else {
|
||
throw new OaiModelException(
|
||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||
'The verb provided in the request is illegal.',
|
||
OaiErrorCodes.BADVERB,
|
||
);
|
||
}
|
||
}
|
||
|
||
protected handleIdentify() {
|
||
// Get configuration values from environment or a dedicated configuration service
|
||
const email = process.env.OAI_EMAIL ?? 'repository@geosphere.at';
|
||
const repositoryName = process.env.OAI_REPOSITORY_NAME ?? 'Tethys RDR';
|
||
const repIdentifier = process.env.OAI_REP_IDENTIFIER ?? 'tethys.at';
|
||
const sampleIdentifier = `oai:${repIdentifier}:1`;
|
||
// Dataset::earliestPublicationDate()->server_date_published->format('Y-m-d\TH:i:s\Z') : null;
|
||
// earliestDateFromDb!= null && (this.xsltParameter['earliestDatestamp'] = earliestDateFromDb?.server_date_published);
|
||
|
||
// set parameters for oai-pmh.xslt
|
||
this.xsltParameter['email'] = email;
|
||
this.xsltParameter['repositoryName'] = repositoryName;
|
||
this.xsltParameter['repIdentifier'] = repIdentifier;
|
||
this.xsltParameter['sampleIdentifier'] = sampleIdentifier;
|
||
// $this->proc->setParameter('', 'earliestDatestamp', $earliestDateFromDb);
|
||
|
||
this.xml.root().ele('Datasets');
|
||
}
|
||
|
||
protected handleListMetadataFormats() {
|
||
this.xml.root().ele('Datasets');
|
||
}
|
||
|
||
protected async handleListSets() {
|
||
const repIdentifier = 'tethys.at';
|
||
this.xsltParameter['repIdentifier'] = repIdentifier;
|
||
const datasetElement = this.xml.root().ele('Datasets');
|
||
|
||
const sets: { [key: string]: string } = {
|
||
'open_access': 'Set for open access licenses',
|
||
'openaire_data': 'OpenAIRE',
|
||
'doc-type:ResearchData': 'Set for document type ResearchData',
|
||
...(await this.getSetsForDatasetTypes()),
|
||
...(await this.getSetsForCollections()),
|
||
// ... await this.getSetsForProjects(),
|
||
} as Dictionary;
|
||
|
||
for (const [key, value] of Object.entries(sets)) {
|
||
const setElement = datasetElement.ele('Rdr_Sets');
|
||
setElement.att('Type', key);
|
||
setElement.att('TypeName', value);
|
||
}
|
||
}
|
||
|
||
protected async handleGetRecord(oaiRequest: Dictionary) {
|
||
const repIdentifier = 'tethys.at';
|
||
this.xsltParameter['repIdentifier'] = repIdentifier;
|
||
|
||
// Validate that required parameter exists early
|
||
if (!('identifier' in oaiRequest)) {
|
||
throw new BadOaiModelException('The prefix of the identifier argument is unknown.');
|
||
}
|
||
|
||
// Validate and extract the dataset identifier from the request
|
||
const dataId = this.validateAndGetIdentifier(oaiRequest);
|
||
|
||
// Retrieve dataset with associated XML cache and collection roles
|
||
const dataset = await Dataset.query()
|
||
.where('publish_id', dataId)
|
||
.preload('xmlCache')
|
||
.preload('collections', (builder) => {
|
||
builder.preload('collectionRole');
|
||
})
|
||
.first();
|
||
|
||
if (!dataset || !dataset.publish_id) {
|
||
throw new OaiModelException(
|
||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||
'The value of the identifier argument is unknown or illegal in this repository.',
|
||
OaiErrorCodes.IDDOESNOTEXIST,
|
||
);
|
||
}
|
||
|
||
// Validate and set the metadata prefix parameter
|
||
const metadataPrefix = this.validateAndGetMetadataPrefix(oaiRequest);
|
||
this.xsltParameter['oai_metadataPrefix'] = metadataPrefix;
|
||
|
||
// Ensure that the dataset is in an exportable state
|
||
this.validateDatasetState(dataset);
|
||
|
||
// Build the XML for the dataset record and add it to the root node
|
||
const datasetNode = this.xml.root().ele('Datasets');
|
||
await this.createXmlRecord(dataset, datasetNode);
|
||
}
|
||
|
||
protected async handleListIdentifiers(oaiRequest: Dictionary, browserFingerprint: string) {
|
||
if (!this.tokenWorker.isConnected) {
|
||
await this.tokenWorker.connect();
|
||
}
|
||
|
||
const maxIdentifier: number = config.get('oai.max.listidentifiers', 100);
|
||
await this.handleLists(oaiRequest, maxIdentifier, browserFingerprint);
|
||
}
|
||
|
||
protected async handleListRecords(oaiRequest: Dictionary, browserFingerprint: string) {
|
||
if (!this.tokenWorker.isConnected) {
|
||
await this.tokenWorker.connect();
|
||
}
|
||
|
||
const maxRecords: number = config.get('oai.max.listrecords', 100);
|
||
await this.handleLists(oaiRequest, maxRecords, browserFingerprint);
|
||
}
|
||
|
||
private async handleLists(oaiRequest: Dictionary, maxRecords: number, browserFingerprint: string) {
|
||
const repIdentifier = 'tethys.at';
|
||
this.xsltParameter['repIdentifier'] = repIdentifier;
|
||
const datasetNode = this.xml.root().ele('Datasets');
|
||
|
||
const paginationParams: PagingParameter ={
|
||
cursor: 0,
|
||
totalLength: 0,
|
||
start: maxRecords + 1,
|
||
nextDocIds: [],
|
||
activeWorkIds: [],
|
||
metadataPrefix: '',
|
||
queryParams: {},
|
||
};
|
||
|
||
if ('resumptionToken' in oaiRequest) {
|
||
await this.handleResumptionToken(oaiRequest, maxRecords, paginationParams);
|
||
} else {
|
||
await this.handleNoResumptionToken(oaiRequest, paginationParams, maxRecords);
|
||
}
|
||
|
||
const nextIds: number[] = paginationParams.nextDocIds;
|
||
const workIds: number[] = paginationParams.activeWorkIds;
|
||
|
||
if (workIds.length === 0) {
|
||
throw new OaiModelException(
|
||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||
'The combination of the given values results in an empty list.',
|
||
OaiErrorCodes.NORECORDSMATCH,
|
||
);
|
||
}
|
||
|
||
const datasets = await Dataset.query()
|
||
.whereIn('publish_id', workIds)
|
||
.preload('xmlCache')
|
||
.preload('collections', (builder) => {
|
||
builder.preload('collectionRole');
|
||
})
|
||
.orderBy('publish_id');
|
||
for (const dataset of datasets) {
|
||
await this.createXmlRecord(dataset, datasetNode);
|
||
}
|
||
await this.setResumptionToken(nextIds, paginationParams, browserFingerprint);
|
||
}
|
||
|
||
private async handleNoResumptionToken(oaiRequest: Dictionary, paginationParams: PagingParameter, maxRecords: number) {
|
||
this.validateMetadataPrefix(oaiRequest, paginationParams);
|
||
const finder: ModelQueryBuilderContract<typeof Dataset, Dataset> = Dataset.query().whereIn(
|
||
'server_state',
|
||
this.deliveringDocumentStates,
|
||
);
|
||
this.applySetFilter(finder, oaiRequest);
|
||
this.applyDateFilters(finder, oaiRequest);
|
||
await this.fetchAndSetResults(finder, paginationParams, oaiRequest, maxRecords);
|
||
}
|
||
|
||
private async fetchAndSetResults(
|
||
finder: ModelQueryBuilderContract<typeof Dataset, Dataset>,
|
||
paginationParams: PagingParameter,
|
||
oaiRequest: Dictionary,
|
||
maxRecords: number
|
||
) {
|
||
const totalResult = await finder
|
||
.clone()
|
||
.count('* as total')
|
||
.first()
|
||
.then((res) => res?.$extras.total);
|
||
paginationParams.totalLength = Number(totalResult);
|
||
|
||
const combinedRecords: Dataset[] = await finder.select('publish_id').orderBy('publish_id').offset(0).limit(maxRecords*2);
|
||
|
||
paginationParams.activeWorkIds = combinedRecords.slice(0, 100).map((dat) => Number(dat.publish_id));
|
||
paginationParams.nextDocIds = combinedRecords.slice(100).map((dat) => Number(dat.publish_id));
|
||
|
||
// No resumption token was used – set queryParams from the current oaiRequest
|
||
paginationParams.queryParams = {
|
||
...oaiRequest,
|
||
deliveringStates: this.deliveringDocumentStates,
|
||
};
|
||
|
||
// paginationParams.totalLength = 230;
|
||
}
|
||
|
||
private async handleResumptionToken(oaiRequest: Dictionary, maxRecords: number, paginationParams: PagingParameter) {
|
||
const resParam = oaiRequest['resumptionToken'];
|
||
const token = await this.tokenWorker.get(resParam);
|
||
|
||
if (!token) {
|
||
throw new OaiModelException(StatusCodes.INTERNAL_SERVER_ERROR, 'cache is outdated.', OaiErrorCodes.BADRESUMPTIONTOKEN);
|
||
}
|
||
|
||
// this.setResumptionParameters(token, maxRecords, paginationParams);
|
||
paginationParams.cursor = token.startPosition - 1;
|
||
paginationParams.start = token.startPosition + maxRecords;
|
||
paginationParams.totalLength = token.totalIds;
|
||
paginationParams.activeWorkIds = token.documentIds;
|
||
paginationParams.metadataPrefix = token.metadataPrefix;
|
||
paginationParams.queryParams = token.queryParams;
|
||
this.xsltParameter['oai_metadataPrefix'] = token.metadataPrefix;
|
||
|
||
const finder = this.buildDatasetQueryViaToken(token);
|
||
const nextRecords: Dataset[] = await this.fetchNextRecords(finder, token, maxRecords);
|
||
paginationParams.nextDocIds = nextRecords.map((dat) => Number(dat.publish_id));
|
||
}
|
||
|
||
private async setResumptionToken(nextIds: number[], paginationParams: PagingParameter, browserFingerprint: string) {
|
||
const countRestIds = nextIds.length;
|
||
if (countRestIds > 0) {
|
||
// const token = this.createResumptionToken(paginationParams, nextIds);
|
||
const token = new ResumptionToken();
|
||
token.startPosition = paginationParams.start;
|
||
token.totalIds = paginationParams.totalLength;
|
||
token.documentIds = nextIds;
|
||
token.metadataPrefix = paginationParams.metadataPrefix;
|
||
token.queryParams = paginationParams.queryParams;
|
||
const res: string = await this.tokenWorker.set(token, browserFingerprint);
|
||
this.setParamResumption(res, paginationParams.cursor, paginationParams.totalLength);
|
||
}
|
||
}
|
||
|
||
private buildDatasetQueryViaToken(token: ResumptionToken) {
|
||
const finder = Dataset.query();
|
||
const originalQuery = token.queryParams || {};
|
||
const deliveringStates = originalQuery.deliveringStates || this.deliveringDocumentStates;
|
||
|
||
finder.whereIn('server_state', deliveringStates);
|
||
this.applySetFilter(finder, originalQuery);
|
||
this.applyDateFilters(finder, originalQuery);
|
||
|
||
return finder;
|
||
}
|
||
|
||
private async fetchNextRecords(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, token: ResumptionToken, maxRecords: number) {
|
||
return finder
|
||
.select('publish_id')
|
||
.orderBy('publish_id')
|
||
.offset(token.startPosition - 1 + maxRecords)
|
||
.limit(100);
|
||
}
|
||
|
||
private validateMetadataPrefix(oaiRequest: Dictionary, paginationParams: PagingParameter) {
|
||
if (!('metadataPrefix' in oaiRequest)) {
|
||
throw new OaiModelException(
|
||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||
'The prefix of the metadata argument is unknown.',
|
||
OaiErrorCodes.BADARGUMENT,
|
||
);
|
||
}
|
||
paginationParams.metadataPrefix = oaiRequest['metadataPrefix'];
|
||
this.xsltParameter['oai_metadataPrefix'] = paginationParams.metadataPrefix;
|
||
}
|
||
|
||
private applySetFilter(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, queryParams: any) {
|
||
if ('set' in queryParams) {
|
||
const [setType, setValue] = queryParams['set'].split(':');
|
||
|
||
switch (setType) {
|
||
case 'data-type':
|
||
setValue && finder.where('type', setValue);
|
||
break;
|
||
case 'open_access':
|
||
finder.andWhereHas('licenses', (query) => {
|
||
query.whereIn('name', ['CC-BY-4.0', 'CC-BY-SA-4.0']);
|
||
});
|
||
break;
|
||
case 'ddc':
|
||
setValue &&
|
||
finder.andWhereHas('collections', (query) => {
|
||
query.where('number', setValue);
|
||
});
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
private applyDateFilters(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, queryParams: any) {
|
||
const { from, until } = queryParams;
|
||
|
||
if (from && until) {
|
||
this.handleFromUntilFilter(finder, from, until);
|
||
} else if (from) {
|
||
this.handleFromFilter(finder, from);
|
||
} else if (until) {
|
||
this.handleUntilFilter(finder, until);
|
||
}
|
||
}
|
||
|
||
private handleFromUntilFilter(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, from: string, until: string) {
|
||
const fromDate = this.parseDateWithValidation(from, 'From');
|
||
const untilDate = this.parseDateWithValidation(until, 'Until');
|
||
|
||
if (from.length !== until.length) {
|
||
throw new OaiModelException(
|
||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||
'The request has different granularities for the from and until parameters.',
|
||
OaiErrorCodes.BADARGUMENT,
|
||
);
|
||
}
|
||
|
||
finder.whereBetween('server_date_published', [fromDate.format('YYYY-MM-DD HH:mm:ss'), untilDate.format('YYYY-MM-DD HH:mm:ss')]);
|
||
}
|
||
|
||
private handleFromFilter(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, from: string) {
|
||
const fromDate = this.parseDateWithValidation(from, 'From');
|
||
const now = dayjs();
|
||
|
||
if (fromDate.isAfter(now)) {
|
||
throw new OaiModelException(
|
||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||
'Given from date is greater than now. The given values results in an empty list.',
|
||
OaiErrorCodes.NORECORDSMATCH,
|
||
);
|
||
}
|
||
|
||
finder.andWhere('server_date_published', '>=', fromDate.format('YYYY-MM-DD HH:mm:ss'));
|
||
}
|
||
|
||
private handleUntilFilter(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, until: string) {
|
||
const untilDate = this.parseDateWithValidation(until, 'Until');
|
||
|
||
const earliestPublicationDate = dayjs(this.firstPublishedDataset?.server_date_published.toISO());
|
||
|
||
if (earliestPublicationDate.isAfter(untilDate)) {
|
||
throw new OaiModelException(
|
||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||
'earliestDatestamp is greater than given until date. The given values results in an empty list.',
|
||
OaiErrorCodes.NORECORDSMATCH,
|
||
);
|
||
}
|
||
|
||
finder.andWhere('server_date_published', '<=', untilDate.format('YYYY-MM-DD HH:mm:ss'));
|
||
}
|
||
|
||
private parseDateWithValidation(dateStr: string, label: string) {
|
||
let date = dayjs(dateStr);
|
||
if (!date.isValid()) {
|
||
throw new OaiModelException(
|
||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||
`${label} date parameter is not valid.`,
|
||
OaiErrorCodes.BADARGUMENT,
|
||
);
|
||
}
|
||
date = dayjs.tz(dateStr, 'Europe/Vienna');
|
||
return date.hour() === 0 ? (label === 'From' ? date.startOf('day') : date.endOf('day')) : date;
|
||
}
|
||
|
||
private setParamResumption(res: string, cursor: number, totalIds: number) {
|
||
const tomorrow = dayjs().add(1, 'day').format('YYYY-MM-DDThh:mm:ss[Z]');
|
||
this.xsltParameter['dateDelete'] = tomorrow;
|
||
this.xsltParameter['res'] = res;
|
||
this.xsltParameter['cursor'] = cursor;
|
||
this.xsltParameter['totalIds'] = totalIds;
|
||
}
|
||
|
||
private validateAndGetIdentifier(oaiRequest: Dictionary): number {
|
||
// Identifier references metadata Urn, not plain Id!
|
||
// Currently implemented as 'oai:foo.bar.de:{docId}' or 'urn:nbn...-123'
|
||
if (!('identifier' in oaiRequest)) {
|
||
throw new BadOaiModelException('The prefix of the identifier argument is unknown.');
|
||
}
|
||
const dataId = Number(this.getDocumentIdByIdentifier(oaiRequest.identifier));
|
||
if (isNaN(dataId)) {
|
||
throw new OaiModelException(
|
||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||
'The value of the identifier argument is illegal in this repository.',
|
||
OaiErrorCodes.BADARGUMENT,
|
||
);
|
||
}
|
||
return dataId;
|
||
}
|
||
|
||
private validateAndGetMetadataPrefix(oaiRequest: Dictionary): string {
|
||
let metadataPrefix = '';
|
||
if ('metadataPrefix' in oaiRequest) {
|
||
metadataPrefix = oaiRequest['metadataPrefix'];
|
||
} else {
|
||
throw new OaiModelException(
|
||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||
'The prefix of the metadata argument is unknown.',
|
||
OaiErrorCodes.BADARGUMENT,
|
||
);
|
||
}
|
||
return metadataPrefix;
|
||
}
|
||
|
||
private validateDatasetState(dataset: Dataset): void {
|
||
if (dataset.server_state == null || !this.deliveringDocumentStates.includes(dataset.server_state)) {
|
||
throw new OaiModelException(
|
||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||
'Document is not available for OAI export!',
|
||
OaiErrorCodes.NORECORDSMATCH,
|
||
);
|
||
}
|
||
}
|
||
|
||
private async createXmlRecord(dataset: Dataset, datasetNode: XMLBuilder) {
|
||
const domNode = await this.getDatasetXmlDomNode(dataset);
|
||
|
||
if (domNode) {
|
||
// add frontdoor url and data-type
|
||
dataset.publish_id && this.addLandingPageAttribute(domNode, dataset.publish_id.toString());
|
||
this.addSpecInformation(domNode, 'data-type:' + dataset.type);
|
||
|
||
if (dataset.collections) {
|
||
for (const coll of dataset.collections) {
|
||
const collRole = coll.collectionRole;
|
||
this.addSpecInformation(domNode, collRole.oai_name + ':' + coll.number);
|
||
}
|
||
}
|
||
|
||
datasetNode.import(domNode);
|
||
}
|
||
}
|
||
|
||
private async getDatasetXmlDomNode(dataset: Dataset) {
|
||
const xmlModel = new XmlModel(dataset);
|
||
// xmlModel.setModel(dataset);
|
||
xmlModel.excludeEmptyFields();
|
||
xmlModel.caching = true;
|
||
// const cache = dataset.xmlCache ? dataset.xmlCache : null;
|
||
// dataset.load('xmlCache');
|
||
if (dataset.xmlCache) {
|
||
xmlModel.xmlCache = dataset.xmlCache;
|
||
}
|
||
|
||
// return cache.getDomDocument();
|
||
const domDocument: XMLBuilder | null = await xmlModel.getDomDocument();
|
||
return domDocument;
|
||
}
|
||
|
||
private addSpecInformation(domNode: XMLBuilder, information: string) {
|
||
domNode.ele('SetSpec').att('Value', information);
|
||
}
|
||
|
||
private addLandingPageAttribute(domNode: XMLBuilder, dataid: string) {
|
||
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
|
||
const url = 'https://' + getDomain(baseDomain) + '/dataset/' + dataid;
|
||
// add attribute du dataset xml element
|
||
domNode.att('landingpage', url);
|
||
}
|
||
|
||
private getDocumentIdByIdentifier(oaiIdentifier: string): string {
|
||
const identifierParts: string[] = oaiIdentifier.split(':'); // explode(":", $oaiIdentifier);
|
||
const dataId: string = identifierParts[2];
|
||
// switch (identifierParts[0]) {
|
||
// case 'oai':
|
||
// if (isset($identifierParts[2])) {
|
||
// $dataId = $identifierParts[2];
|
||
// }
|
||
// break;
|
||
// default:
|
||
// throw new OaiModelException(
|
||
// 'The prefix of the identifier argument is unknown.',
|
||
// OaiModelError::BADARGUMENT
|
||
// );
|
||
// break;
|
||
// }
|
||
|
||
// if (empty($dataId) or !preg_match('/^\d+$/', $dataId)) {
|
||
// throw new OaiModelException(
|
||
// 'The value of the identifier argument is unknown or illegal in this repository.',
|
||
// OaiModelError::IDDOESNOTEXIST
|
||
// );
|
||
|
||
return dataId;
|
||
}
|
||
|
||
private async getSetsForCollections(): Promise<Dictionary> {
|
||
const sets: { [key: string]: string } = {} as Dictionary;
|
||
|
||
const collections = await Collection.query()
|
||
.select('name', 'number', 'role_id')
|
||
.whereHas('collectionRole', (query) => {
|
||
query.where('visible_oai', true);
|
||
})
|
||
.preload('collectionRole');
|
||
|
||
collections.forEach((collection) => {
|
||
// if collection has a collection role (classification like ddc):
|
||
if (collection.number) {
|
||
// collection.load('collectionRole');
|
||
const setSpec = collection.collectionRole?.oai_name + ':' + collection.number;
|
||
sets[setSpec] = `Set ${collection.number} '${collection.name}'`;
|
||
}
|
||
});
|
||
return sets;
|
||
}
|
||
|
||
private async getSetsForDatasetTypes(): Promise<Dictionary> {
|
||
const sets: { [key: string]: string } = {} as Dictionary;
|
||
|
||
const datasets: Array<Dataset> = await Dataset.query().select('type').where('server_state', 'published');
|
||
|
||
datasets.forEach((dataset) => {
|
||
if (dataset.type && false == preg_match(this.sampleRegEx, dataset.type)) {
|
||
const msg = `Invalid SetSpec (data-type='${dataset.type}').
|
||
Allowed characters are [${this.sampleRegEx}].`;
|
||
// Log::error("OAI-PMH: $msg");
|
||
logger.error(`OAI-PMH: ${msg}`);
|
||
return;
|
||
}
|
||
const setSpec = 'data-type:' + dataset.type;
|
||
sets[setSpec] = `Set for document type '${dataset.type}'`;
|
||
});
|
||
return sets;
|
||
}
|
||
|
||
private handleIllegalVerb() {
|
||
this.xsltParameter['oai_error_code'] = 'badVerb';
|
||
this.xsltParameter['oai_error_message'] = 'The verb provided in the request is illegal.';
|
||
}
|
||
|
||
/**
|
||
* Helper method to build a browser fingerprint by combining:
|
||
* - User-Agent header,
|
||
* - the IP address,
|
||
* - Accept-Language header,
|
||
* - current timestamp rounded to the hour.
|
||
*
|
||
* Every new hour, this will return a different fingerprint.
|
||
*/
|
||
private getBrowserFingerprint(request: Request): string {
|
||
const userAgent = request.header('user-agent') || 'unknown';
|
||
// Check for X-Forwarded-For header to use the client IP from the proxy if available.
|
||
const xForwardedFor = request.header('x-forwarded-for');
|
||
let ip = request.ip();
|
||
// console.log(ip);
|
||
if (xForwardedFor) {
|
||
// X-Forwarded-For may contain a comma-separated list of IPs; the first one is the client IP.
|
||
ip = xForwardedFor.split(',')[0].trim();
|
||
// console.log('xforwardedfor ip' + ip);
|
||
}
|
||
const locale = request.header('accept-language') || 'default';
|
||
// Round the current time to the start of the hour.
|
||
const timestampHour = dayjs().startOf('hour').format('YYYY-MM-DDTHH');
|
||
return `${userAgent}-${ip}-${locale}-${timestampHour}`;
|
||
}
|
||
}
|