tethys.backend/app/Controllers/Http/Oai/OaiController.ts
Arno Kaimbacher b540547e4c
All checks were successful
CI / container-job (push) Successful in 49s
feat: update API controllers, validations, and Vue components
- Modified Api/Authors.Controller.ts to use only personal types and sort by dataset_count.
- Completely rewritten AvatarController.ts.
- Added new Api/CollectionsController.ts for querying collections and collection_roles.
- Modified Api/DatasetController.ts to preload titles, identifier and order by server_date_published.
- Modified FileController.ts to serve files from /storage/app/data/ instead of /storage/app/public.
- Added new Api/UserController for requesting submitters (getSubmitters).
- Improved OaiController.ts with performant DB queries for better ResumptionToken handling.
- Modified Submitter/DatasetController.ts by adding a categorize method for library classification.
- Rewritten ResumptionToken.ts.
- Improved TokenWorkerService.ts to utilize browser fingerprint.
- Edited dataset.ts by adding the doiIdentifier property.
- Enhanced person.ts to improve the fullName property.
- Completely rewritten AsideMenuItem.vue component.
- Updated CarBoxClient.vue to use TypeScript.
- Added new CardBoxDataset.vue for displaying recent datasets on the dashboard.
- Completely rewritten TableSampleClients.vue for the dashboard.
- Completely rewritten UserAvatar.vue.
- Made small layout changes in Dashboard.vue.
- Added new Category.vue for browsing scientific collections.
- Adapted the pinia store in main.ts.
- Added additional routes in start/routes.ts and start/api/routes.ts.
- Improved referenceValidation.ts for better ISBN existence checking.
- NPM dependency updates.
2025-03-14 17:39:58 +01:00

727 lines
30 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import type { HttpContext } from '@adonisjs/core/http';
// import { RequestContract } from '@ioc:Adonis/Core/Request';
import { Request } from '@adonisjs/core/http';
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
import { create } from 'xmlbuilder2';
import dayjs, { Dayjs } from 'dayjs';
import utc from 'dayjs/plugin/utc.js';
import timezone from 'dayjs/plugin/timezone.js';
import { readFileSync } from 'fs';
import { StatusCodes } from 'http-status-codes';
import SaxonJS from 'saxon-js';
// import { Xslt, xmlParse } from 'xslt-processor'
import { OaiErrorCodes, OaiModelError } from '#app/exceptions/OaiErrorCodes';
import { OaiModelException, BadOaiModelException } from '#app/exceptions/OaiModelException';
import Dataset from '#models/dataset';
import Collection from '#models/collection';
import { getDomain, preg_match } from '#app/utils/utility-functions';
import XmlModel from '#app/Library/XmlModel';
import logger from '@adonisjs/core/services/logger';
import ResumptionToken from '#app/Library/Oai/ResumptionToken';
// import Config from '@ioc:Adonis/Core/Config';
import config from '@adonisjs/core/services/config';
// import { inject } from '@adonisjs/fold';
import { inject } from '@adonisjs/core';
// import { TokenWorkerContract } from "MyApp/Models/TokenWorker";
import TokenWorkerContract from '#library/Oai/TokenWorkerContract';
import { ModelQueryBuilderContract } from '@adonisjs/lucid/types/model';
interface XslTParameter {
[key: string]: any;
}
interface Dictionary {
[index: string]: string;
}
interface PagingParameter {
cursor: number;
totalLength: number;
start: number;
nextDocIds: number[];
activeWorkIds: number[];
metadataPrefix: string;
queryParams: Object;
}
@inject()
export default class OaiController {
private deliveringDocumentStates = ['published', 'deleted'];
private sampleRegEx = /^[A-Za-zäüÄÜß0-9\-_.!~]+$/;
private xsltParameter: XslTParameter;
private firstPublishedDataset: Dataset | null;
/**
* Holds xml representation of document information to be processed.
*
* @var xmlbuilder.XMLDocument | null Defaults to null.
*/
private xml: XMLBuilder;
private proc;
constructor(public tokenWorker: TokenWorkerContract) {
// Load the XSLT file
this.proc = readFileSync('public/assets2/datasetxml2oai.sef.json');
dayjs.extend(utc);
dayjs.extend(timezone);
}
public async index({ response, request }: HttpContext): Promise<void> {
this.xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
// this.proc = new XSLTProcessor();
// const stylesheet = readFileSync(__dirname + "/datasetxml2oai.sef.json");
const xsltParameter: XslTParameter = (this.xsltParameter = {});
let oaiRequest: Dictionary = {};
if (request.method() === 'POST') {
oaiRequest = request.body();
} else if (request.method() === 'GET') {
oaiRequest = request.qs();
} else {
xsltParameter['oai_error_code'] = 'unknown';
xsltParameter['oai_error_message'] = 'Only POST and GET methods are allowed for OAI-PMH.';
}
let earliestDateFromDb;
// const oaiRequest: OaiParameter = request.body;
try {
this.firstPublishedDataset = await Dataset.earliestPublicationDate();
this.firstPublishedDataset != null &&
(earliestDateFromDb = this.firstPublishedDataset.server_date_published.toFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"));
this.xsltParameter['earliestDatestamp'] = earliestDateFromDb;
// start the request
await this.handleRequest(oaiRequest, request);
} catch (error) {
if (error instanceof OaiModelException) {
const code = error.oaiCode;
let oaiErrorCode: string | undefined = 'Unknown oai error code ' + code;
if (OaiModelError.has(error.oaiCode) && OaiModelError.get(code) !== undefined) {
oaiErrorCode = OaiModelError.get(error.oaiCode);
}
this.xsltParameter['oai_error_code'] = oaiErrorCode;
this.xsltParameter['oai_error_message'] = error.message;
} else {
// // return next(error); // passing to default express middleware error handler
this.xsltParameter['oai_error_code'] = 'unknown';
this.xsltParameter['oai_error_message'] = 'An internal error occured.';
}
}
const xmlString = this.xml.end({ prettyPrint: true });
let xmlOutput; // = xmlString;
try {
const result = await SaxonJS.transform({
// stylesheetFileName: `${config.TMP_BASE_DIR}/data-quality/rules/iati.sef.json`,
stylesheetText: this.proc,
destination: 'serialized',
// sourceFileName: sourceFile,
sourceText: xmlString,
stylesheetParams: xsltParameter,
// logLevel: 10,
});
xmlOutput = result.principalResult;
} catch (error) {
return response.status(500).json({
message: 'An error occurred while creating the user',
error: error.message,
});
}
response
.header('Content-Type', 'application/xml; charset=utf-8')
.header('Access-Control-Allow-Origin', '*')
.header('Access-Control-Allow-Methods', 'GET,POST');
response.status(StatusCodes.OK).send(xmlOutput);
}
protected async handleRequest(oaiRequest: Dictionary, request: Request) {
// Setup stylesheet
// $this->loadStyleSheet('datasetxml2oai-pmh.xslt');
// Set response time
const now: Dayjs = dayjs();
this.xsltParameter['responseDate'] = now.format('YYYY-MM-DDTHH:mm:ss[Z]');
this.xsltParameter['unixTimestamp'] = now.unix();
// set OAI base url
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
this.xsltParameter['baseURL'] = baseDomain + '/oai';
this.xsltParameter['repURL'] = request.protocol() + '://' + request.hostname();
this.xsltParameter['downloadLink'] = request.protocol() + '://' + request.hostname() + '/file/download/';
this.xsltParameter['doiLink'] = 'https://doi.org/';
this.xsltParameter['doiPrefix'] = 'info:eu-repo/semantics/altIdentifier/doi/';
if (oaiRequest['verb']) {
const verb = oaiRequest['verb'];
this.xsltParameter['oai_verb'] = verb;
if (verb === 'Identify') {
this.handleIdentify();
} else if (verb === 'ListMetadataFormats') {
this.handleListMetadataFormats();
} else if (verb == 'GetRecord') {
await this.handleGetRecord(oaiRequest);
} else if (verb == 'ListRecords') {
// Get browser fingerprint from the request:
const browserFingerprint = this.getBrowserFingerprint(request);
await this.handleListRecords(oaiRequest, browserFingerprint);
} else if (verb == 'ListIdentifiers') {
// Get browser fingerprint from the request:
const browserFingerprint = this.getBrowserFingerprint(request);
await this.handleListIdentifiers(oaiRequest, browserFingerprint);
} else if (verb == 'ListSets') {
await this.handleListSets();
} else {
this.handleIllegalVerb();
}
} else {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The verb provided in the request is illegal.',
OaiErrorCodes.BADVERB,
);
}
}
protected handleIdentify() {
// Get configuration values from environment or a dedicated configuration service
const email = process.env.OAI_EMAIL ?? 'repository@geosphere.at';
const repositoryName = process.env.OAI_REPOSITORY_NAME ?? 'Tethys RDR';
const repIdentifier = process.env.OAI_REP_IDENTIFIER ?? 'tethys.at';
const sampleIdentifier = `oai:${repIdentifier}:1`;
// Dataset::earliestPublicationDate()->server_date_published->format('Y-m-d\TH:i:s\Z') : null;
// earliestDateFromDb!= null && (this.xsltParameter['earliestDatestamp'] = earliestDateFromDb?.server_date_published);
// set parameters for oai-pmh.xslt
this.xsltParameter['email'] = email;
this.xsltParameter['repositoryName'] = repositoryName;
this.xsltParameter['repIdentifier'] = repIdentifier;
this.xsltParameter['sampleIdentifier'] = sampleIdentifier;
// $this->proc->setParameter('', 'earliestDatestamp', $earliestDateFromDb);
this.xml.root().ele('Datasets');
}
protected handleListMetadataFormats() {
this.xml.root().ele('Datasets');
}
protected async handleListSets() {
const repIdentifier = 'tethys.at';
this.xsltParameter['repIdentifier'] = repIdentifier;
const datasetElement = this.xml.root().ele('Datasets');
const sets: { [key: string]: string } = {
'open_access': 'Set for open access licenses',
'openaire_data': 'OpenAIRE',
'doc-type:ResearchData': 'Set for document type ResearchData',
...(await this.getSetsForDatasetTypes()),
...(await this.getSetsForCollections()),
// ... await this.getSetsForProjects(),
} as Dictionary;
for (const [key, value] of Object.entries(sets)) {
const setElement = datasetElement.ele('Rdr_Sets');
setElement.att('Type', key);
setElement.att('TypeName', value);
}
}
protected async handleGetRecord(oaiRequest: Dictionary) {
const repIdentifier = 'tethys.at';
this.xsltParameter['repIdentifier'] = repIdentifier;
// Validate that required parameter exists early
if (!('identifier' in oaiRequest)) {
throw new BadOaiModelException('The prefix of the identifier argument is unknown.');
}
// Validate and extract the dataset identifier from the request
const dataId = this.validateAndGetIdentifier(oaiRequest);
// Retrieve dataset with associated XML cache and collection roles
const dataset = await Dataset.query()
.where('publish_id', dataId)
.preload('xmlCache')
.preload('collections', (builder) => {
builder.preload('collectionRole');
})
.first();
if (!dataset || !dataset.publish_id) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The value of the identifier argument is unknown or illegal in this repository.',
OaiErrorCodes.IDDOESNOTEXIST,
);
}
// Validate and set the metadata prefix parameter
const metadataPrefix = this.validateAndGetMetadataPrefix(oaiRequest);
this.xsltParameter['oai_metadataPrefix'] = metadataPrefix;
// Ensure that the dataset is in an exportable state
this.validateDatasetState(dataset);
// Build the XML for the dataset record and add it to the root node
const datasetNode = this.xml.root().ele('Datasets');
await this.createXmlRecord(dataset, datasetNode);
}
protected async handleListIdentifiers(oaiRequest: Dictionary, browserFingerprint: string) {
if (!this.tokenWorker.isConnected) {
await this.tokenWorker.connect();
}
const maxIdentifier: number = config.get('oai.max.listidentifiers', 100);
await this.handleLists(oaiRequest, maxIdentifier, browserFingerprint);
}
protected async handleListRecords(oaiRequest: Dictionary, browserFingerprint: string) {
if (!this.tokenWorker.isConnected) {
await this.tokenWorker.connect();
}
const maxRecords: number = config.get('oai.max.listrecords', 100);
await this.handleLists(oaiRequest, maxRecords, browserFingerprint);
}
private async handleLists(oaiRequest: Dictionary, maxRecords: number, browserFingerprint: string) {
const repIdentifier = 'tethys.at';
this.xsltParameter['repIdentifier'] = repIdentifier;
const datasetNode = this.xml.root().ele('Datasets');
const paginationParams: PagingParameter ={
cursor: 0,
totalLength: 0,
start: maxRecords + 1,
nextDocIds: [],
activeWorkIds: [],
metadataPrefix: '',
queryParams: {},
};
if ('resumptionToken' in oaiRequest) {
await this.handleResumptionToken(oaiRequest, maxRecords, paginationParams);
} else {
await this.handleNoResumptionToken(oaiRequest, paginationParams, maxRecords);
}
const nextIds: number[] = paginationParams.nextDocIds;
const workIds: number[] = paginationParams.activeWorkIds;
if (workIds.length === 0) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The combination of the given values results in an empty list.',
OaiErrorCodes.NORECORDSMATCH,
);
}
const datasets = await Dataset.query()
.whereIn('publish_id', workIds)
.preload('xmlCache')
.preload('collections', (builder) => {
builder.preload('collectionRole');
})
.orderBy('publish_id');
for (const dataset of datasets) {
await this.createXmlRecord(dataset, datasetNode);
}
await this.setResumptionToken(nextIds, paginationParams, browserFingerprint);
}
private async handleNoResumptionToken(oaiRequest: Dictionary, paginationParams: PagingParameter, maxRecords: number) {
this.validateMetadataPrefix(oaiRequest, paginationParams);
const finder: ModelQueryBuilderContract<typeof Dataset, Dataset> = Dataset.query().whereIn(
'server_state',
this.deliveringDocumentStates,
);
this.applySetFilter(finder, oaiRequest);
this.applyDateFilters(finder, oaiRequest);
await this.fetchAndSetResults(finder, paginationParams, oaiRequest, maxRecords);
}
private async fetchAndSetResults(
finder: ModelQueryBuilderContract<typeof Dataset, Dataset>,
paginationParams: PagingParameter,
oaiRequest: Dictionary,
maxRecords: number
) {
const totalResult = await finder
.clone()
.count('* as total')
.first()
.then((res) => res?.$extras.total);
paginationParams.totalLength = Number(totalResult);
const combinedRecords: Dataset[] = await finder.select('publish_id').orderBy('publish_id').offset(0).limit(maxRecords*2);
paginationParams.activeWorkIds = combinedRecords.slice(0, 100).map((dat) => Number(dat.publish_id));
paginationParams.nextDocIds = combinedRecords.slice(100).map((dat) => Number(dat.publish_id));
// No resumption token was used set queryParams from the current oaiRequest
paginationParams.queryParams = {
...oaiRequest,
deliveringStates: this.deliveringDocumentStates,
};
// paginationParams.totalLength = 230;
}
private async handleResumptionToken(oaiRequest: Dictionary, maxRecords: number, paginationParams: PagingParameter) {
const resParam = oaiRequest['resumptionToken'];
const token = await this.tokenWorker.get(resParam);
if (!token) {
throw new OaiModelException(StatusCodes.INTERNAL_SERVER_ERROR, 'cache is outdated.', OaiErrorCodes.BADRESUMPTIONTOKEN);
}
// this.setResumptionParameters(token, maxRecords, paginationParams);
paginationParams.cursor = token.startPosition - 1;
paginationParams.start = token.startPosition + maxRecords;
paginationParams.totalLength = token.totalIds;
paginationParams.activeWorkIds = token.documentIds;
paginationParams.metadataPrefix = token.metadataPrefix;
paginationParams.queryParams = token.queryParams;
this.xsltParameter['oai_metadataPrefix'] = token.metadataPrefix;
const finder = this.buildDatasetQueryViaToken(token);
const nextRecords: Dataset[] = await this.fetchNextRecords(finder, token, maxRecords);
paginationParams.nextDocIds = nextRecords.map((dat) => Number(dat.publish_id));
}
private async setResumptionToken(nextIds: number[], paginationParams: PagingParameter, browserFingerprint: string) {
const countRestIds = nextIds.length;
if (countRestIds > 0) {
// const token = this.createResumptionToken(paginationParams, nextIds);
const token = new ResumptionToken();
token.startPosition = paginationParams.start;
token.totalIds = paginationParams.totalLength;
token.documentIds = nextIds;
token.metadataPrefix = paginationParams.metadataPrefix;
token.queryParams = paginationParams.queryParams;
const res: string = await this.tokenWorker.set(token, browserFingerprint);
this.setParamResumption(res, paginationParams.cursor, paginationParams.totalLength);
}
}
private buildDatasetQueryViaToken(token: ResumptionToken) {
const finder = Dataset.query();
const originalQuery = token.queryParams || {};
const deliveringStates = originalQuery.deliveringStates || this.deliveringDocumentStates;
finder.whereIn('server_state', deliveringStates);
this.applySetFilter(finder, originalQuery);
this.applyDateFilters(finder, originalQuery);
return finder;
}
private async fetchNextRecords(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, token: ResumptionToken, maxRecords: number) {
return finder
.select('publish_id')
.orderBy('publish_id')
.offset(token.startPosition - 1 + maxRecords)
.limit(100);
}
private validateMetadataPrefix(oaiRequest: Dictionary, paginationParams: PagingParameter) {
if (!('metadataPrefix' in oaiRequest)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The prefix of the metadata argument is unknown.',
OaiErrorCodes.BADARGUMENT,
);
}
paginationParams.metadataPrefix = oaiRequest['metadataPrefix'];
this.xsltParameter['oai_metadataPrefix'] = paginationParams.metadataPrefix;
}
private applySetFilter(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, queryParams: any) {
if ('set' in queryParams) {
const [setType, setValue] = queryParams['set'].split(':');
switch (setType) {
case 'data-type':
setValue && finder.where('type', setValue);
break;
case 'open_access':
finder.andWhereHas('licenses', (query) => {
query.whereIn('name', ['CC-BY-4.0', 'CC-BY-SA-4.0']);
});
break;
case 'ddc':
setValue &&
finder.andWhereHas('collections', (query) => {
query.where('number', setValue);
});
break;
}
}
}
private applyDateFilters(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, queryParams: any) {
const { from, until } = queryParams;
if (from && until) {
this.handleFromUntilFilter(finder, from, until);
} else if (from) {
this.handleFromFilter(finder, from);
} else if (until) {
this.handleUntilFilter(finder, until);
}
}
private handleFromUntilFilter(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, from: string, until: string) {
const fromDate = this.parseDateWithValidation(from, 'From');
const untilDate = this.parseDateWithValidation(until, 'Until');
if (from.length !== until.length) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The request has different granularities for the from and until parameters.',
OaiErrorCodes.BADARGUMENT,
);
}
finder.whereBetween('server_date_published', [fromDate.format('YYYY-MM-DD HH:mm:ss'), untilDate.format('YYYY-MM-DD HH:mm:ss')]);
}
private handleFromFilter(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, from: string) {
const fromDate = this.parseDateWithValidation(from, 'From');
const now = dayjs();
if (fromDate.isAfter(now)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'Given from date is greater than now. The given values results in an empty list.',
OaiErrorCodes.NORECORDSMATCH,
);
}
finder.andWhere('server_date_published', '>=', fromDate.format('YYYY-MM-DD HH:mm:ss'));
}
private handleUntilFilter(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, until: string) {
const untilDate = this.parseDateWithValidation(until, 'Until');
const earliestPublicationDate = dayjs(this.firstPublishedDataset?.server_date_published.toISO());
if (earliestPublicationDate.isAfter(untilDate)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'earliestDatestamp is greater than given until date. The given values results in an empty list.',
OaiErrorCodes.NORECORDSMATCH,
);
}
finder.andWhere('server_date_published', '<=', untilDate.format('YYYY-MM-DD HH:mm:ss'));
}
private parseDateWithValidation(dateStr: string, label: string) {
let date = dayjs(dateStr);
if (!date.isValid()) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
`${label} date parameter is not valid.`,
OaiErrorCodes.BADARGUMENT,
);
}
date = dayjs.tz(dateStr, 'Europe/Vienna');
return date.hour() === 0 ? (label === 'From' ? date.startOf('day') : date.endOf('day')) : date;
}
private setParamResumption(res: string, cursor: number, totalIds: number) {
const tomorrow = dayjs().add(1, 'day').format('YYYY-MM-DDThh:mm:ss[Z]');
this.xsltParameter['dateDelete'] = tomorrow;
this.xsltParameter['res'] = res;
this.xsltParameter['cursor'] = cursor;
this.xsltParameter['totalIds'] = totalIds;
}
private validateAndGetIdentifier(oaiRequest: Dictionary): number {
// Identifier references metadata Urn, not plain Id!
// Currently implemented as 'oai:foo.bar.de:{docId}' or 'urn:nbn...-123'
if (!('identifier' in oaiRequest)) {
throw new BadOaiModelException('The prefix of the identifier argument is unknown.');
}
const dataId = Number(this.getDocumentIdByIdentifier(oaiRequest.identifier));
if (isNaN(dataId)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The value of the identifier argument is illegal in this repository.',
OaiErrorCodes.BADARGUMENT,
);
}
return dataId;
}
private validateAndGetMetadataPrefix(oaiRequest: Dictionary): string {
let metadataPrefix = '';
if ('metadataPrefix' in oaiRequest) {
metadataPrefix = oaiRequest['metadataPrefix'];
} else {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The prefix of the metadata argument is unknown.',
OaiErrorCodes.BADARGUMENT,
);
}
return metadataPrefix;
}
private validateDatasetState(dataset: Dataset): void {
if (dataset.server_state == null || !this.deliveringDocumentStates.includes(dataset.server_state)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'Document is not available for OAI export!',
OaiErrorCodes.NORECORDSMATCH,
);
}
}
private async createXmlRecord(dataset: Dataset, datasetNode: XMLBuilder) {
const domNode = await this.getDatasetXmlDomNode(dataset);
if (domNode) {
// add frontdoor url and data-type
dataset.publish_id && this.addLandingPageAttribute(domNode, dataset.publish_id.toString());
this.addSpecInformation(domNode, 'data-type:' + dataset.type);
if (dataset.collections) {
for (const coll of dataset.collections) {
const collRole = coll.collectionRole;
this.addSpecInformation(domNode, collRole.oai_name + ':' + coll.number);
}
}
datasetNode.import(domNode);
}
}
private async getDatasetXmlDomNode(dataset: Dataset) {
const xmlModel = new XmlModel(dataset);
// xmlModel.setModel(dataset);
xmlModel.excludeEmptyFields();
xmlModel.caching = true;
// const cache = dataset.xmlCache ? dataset.xmlCache : null;
// dataset.load('xmlCache');
if (dataset.xmlCache) {
xmlModel.xmlCache = dataset.xmlCache;
}
// return cache.getDomDocument();
const domDocument: XMLBuilder | null = await xmlModel.getDomDocument();
return domDocument;
}
private addSpecInformation(domNode: XMLBuilder, information: string) {
domNode.ele('SetSpec').att('Value', information);
}
private addLandingPageAttribute(domNode: XMLBuilder, dataid: string) {
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
const url = 'https://' + getDomain(baseDomain) + '/dataset/' + dataid;
// add attribute du dataset xml element
domNode.att('landingpage', url);
}
private getDocumentIdByIdentifier(oaiIdentifier: string): string {
const identifierParts: string[] = oaiIdentifier.split(':'); // explode(":", $oaiIdentifier);
const dataId: string = identifierParts[2];
// switch (identifierParts[0]) {
// case 'oai':
// if (isset($identifierParts[2])) {
// $dataId = $identifierParts[2];
// }
// break;
// default:
// throw new OaiModelException(
// 'The prefix of the identifier argument is unknown.',
// OaiModelError::BADARGUMENT
// );
// break;
// }
// if (empty($dataId) or !preg_match('/^\d+$/', $dataId)) {
// throw new OaiModelException(
// 'The value of the identifier argument is unknown or illegal in this repository.',
// OaiModelError::IDDOESNOTEXIST
// );
return dataId;
}
private async getSetsForCollections(): Promise<Dictionary> {
const sets: { [key: string]: string } = {} as Dictionary;
const collections = await Collection.query()
.select('name', 'number', 'role_id')
.whereHas('collectionRole', (query) => {
query.where('visible_oai', true);
})
.preload('collectionRole');
collections.forEach((collection) => {
// if collection has a collection role (classification like ddc):
if (collection.number) {
// collection.load('collectionRole');
const setSpec = collection.collectionRole?.oai_name + ':' + collection.number;
sets[setSpec] = `Set ${collection.number} '${collection.name}'`;
}
});
return sets;
}
private async getSetsForDatasetTypes(): Promise<Dictionary> {
const sets: { [key: string]: string } = {} as Dictionary;
const datasets: Array<Dataset> = await Dataset.query().select('type').where('server_state', 'published');
datasets.forEach((dataset) => {
if (dataset.type && false == preg_match(this.sampleRegEx, dataset.type)) {
const msg = `Invalid SetSpec (data-type='${dataset.type}').
Allowed characters are [${this.sampleRegEx}].`;
// Log::error("OAI-PMH: $msg");
logger.error(`OAI-PMH: ${msg}`);
return;
}
const setSpec = 'data-type:' + dataset.type;
sets[setSpec] = `Set for document type '${dataset.type}'`;
});
return sets;
}
private handleIllegalVerb() {
this.xsltParameter['oai_error_code'] = 'badVerb';
this.xsltParameter['oai_error_message'] = 'The verb provided in the request is illegal.';
}
/**
* Helper method to build a browser fingerprint by combining:
* - User-Agent header,
* - the IP address,
* - Accept-Language header,
* - current timestamp rounded to the hour.
*
* Every new hour, this will return a different fingerprint.
*/
private getBrowserFingerprint(request: Request): string {
const userAgent = request.header('user-agent') || 'unknown';
// Check for X-Forwarded-For header to use the client IP from the proxy if available.
const xForwardedFor = request.header('x-forwarded-for');
let ip = request.ip();
// console.log(ip);
if (xForwardedFor) {
// X-Forwarded-For may contain a comma-separated list of IPs; the first one is the client IP.
ip = xForwardedFor.split(',')[0].trim();
// console.log('xforwardedfor ip' + ip);
}
const locale = request.header('accept-language') || 'default';
// Round the current time to the start of the hour.
const timestampHour = dayjs().startOf('hour').format('YYYY-MM-DDTHH');
return `${userAgent}-${ip}-${locale}-${timestampHour}`;
}
}