tethys.backend/app/Controllers/Http/Oai/OaiController.ts
Arno Kaimbacher b5bbe26ec2
Some checks failed
build.yaml / feat: Enhance background job settings UI and functionality (push) Failing after 0s
feat: Enhance background job settings UI and functionality
- Updated BackgroundJob.vue to improve the display of background job statuses, including missing cross-references and current job mode.
- Added auto-refresh functionality for background job status.
- Introduced success toast notifications for successful status refreshes.
- Modified the XML serialization process in DatasetXmlSerializer for better caching and performance.
- Implemented a new RuleProvider for managing custom validation rules.
- Improved error handling in routes for loading background job settings.
- Enhanced ClamScan configuration with socket support for virus scanning.
- Refactored dayjs utility to streamline locale management.
2025-10-14 12:19:09 +02:00

729 lines
30 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import type { HttpContext } from '@adonisjs/core/http';
// import { RequestContract } from '@ioc:Adonis/Core/Request';
import { Request } from '@adonisjs/core/http';
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
import { create } from 'xmlbuilder2';
import dayjs, { Dayjs } from 'dayjs';
import utc from 'dayjs/plugin/utc.js';
import timezone from 'dayjs/plugin/timezone.js';
import { readFileSync } from 'fs';
import { StatusCodes } from 'http-status-codes';
import SaxonJS from 'saxon-js';
// import { Xslt, xmlParse } from 'xslt-processor'
import { OaiErrorCodes, OaiModelError } from '#app/exceptions/OaiErrorCodes';
import { OaiModelException, BadOaiModelException } from '#app/exceptions/OaiModelException';
import Dataset from '#models/dataset';
import Collection from '#models/collection';
import { getDomain, preg_match } from '#app/utils/utility-functions';
import DatasetXmlSerializer from '#app/Library/DatasetXmlSerializer';
import logger from '@adonisjs/core/services/logger';
import ResumptionToken from '#app/Library/Oai/ResumptionToken';
// import Config from '@ioc:Adonis/Core/Config';
import config from '@adonisjs/core/services/config';
// import { inject } from '@adonisjs/fold';
import { inject } from '@adonisjs/core';
// import { TokenWorkerContract } from "MyApp/Models/TokenWorker";
import TokenWorkerContract from '#library/Oai/TokenWorkerContract';
import { ModelQueryBuilderContract } from '@adonisjs/lucid/types/model';
interface XslTParameter {
[key: string]: any;
}
interface Dictionary {
[index: string]: string;
}
interface PagingParameter {
cursor: number;
totalLength: number;
start: number;
nextDocIds: number[];
activeWorkIds: number[];
metadataPrefix: string;
queryParams: Object;
}
@inject()
export default class OaiController {
private deliveringDocumentStates = ['published', 'deleted'];
private sampleRegEx = /^[A-Za-zäüÄÜß0-9\-_.!~]+$/;
private xsltParameter: XslTParameter;
private firstPublishedDataset: Dataset | null;
/**
* Holds xml representation of document information to be processed.
*
* @var xmlbuilder.XMLDocument | null Defaults to null.
*/
private xml: XMLBuilder;
private proc;
constructor(public tokenWorker: TokenWorkerContract) {
// Load the XSLT file
this.proc = readFileSync('public/assets2/datasetxml2oai.sef.json');
dayjs.extend(utc);
dayjs.extend(timezone);
}
public async index({ response, request }: HttpContext): Promise<void> {
this.xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
// this.proc = new XSLTProcessor();
// const stylesheet = readFileSync(__dirname + "/datasetxml2oai.sef.json");
const xsltParameter: XslTParameter = (this.xsltParameter = {});
let oaiRequest: Dictionary = {};
if (request.method() === 'POST') {
oaiRequest = request.body();
} else if (request.method() === 'GET') {
oaiRequest = request.qs();
} else {
xsltParameter['oai_error_code'] = 'unknown';
xsltParameter['oai_error_message'] = 'Only POST and GET methods are allowed for OAI-PMH.';
}
let earliestDateFromDb;
// const oaiRequest: OaiParameter = request.body;
try {
this.firstPublishedDataset = await Dataset.earliestPublicationDate();
this.firstPublishedDataset != null &&
(earliestDateFromDb = this.firstPublishedDataset.server_date_published.toFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"));
this.xsltParameter['earliestDatestamp'] = earliestDateFromDb;
// start the request
await this.handleRequest(oaiRequest, request);
} catch (error) {
if (error instanceof OaiModelException) {
const code = error.oaiCode;
let oaiErrorCode: string | undefined = 'Unknown oai error code ' + code;
if (OaiModelError.has(error.oaiCode) && OaiModelError.get(code) !== undefined) {
oaiErrorCode = OaiModelError.get(error.oaiCode);
}
this.xsltParameter['oai_error_code'] = oaiErrorCode;
this.xsltParameter['oai_error_message'] = error.message;
} else {
// // return next(error); // passing to default express middleware error handler
this.xsltParameter['oai_error_code'] = 'unknown';
this.xsltParameter['oai_error_message'] = 'An internal error occured.';
}
}
const xmlString = this.xml.end({ prettyPrint: true });
let xmlOutput; // = xmlString;
try {
const result = await SaxonJS.transform({
// stylesheetFileName: `${config.TMP_BASE_DIR}/data-quality/rules/iati.sef.json`,
stylesheetText: this.proc,
destination: 'serialized',
// sourceFileName: sourceFile,
sourceText: xmlString,
stylesheetParams: xsltParameter,
// logLevel: 10,
});
xmlOutput = result.principalResult;
} catch (error) {
return response.status(500).json({
message: 'An error occurred while creating the user',
error: error.message,
});
}
response
.header('Content-Type', 'application/xml; charset=utf-8')
.header('Access-Control-Allow-Origin', '*')
.header('Access-Control-Allow-Methods', 'GET,POST');
response.status(StatusCodes.OK).send(xmlOutput);
}
protected async handleRequest(oaiRequest: Dictionary, request: Request) {
// Setup stylesheet
// $this->loadStyleSheet('datasetxml2oai-pmh.xslt');
// Set response time
const now: Dayjs = dayjs();
this.xsltParameter['responseDate'] = now.format('YYYY-MM-DDTHH:mm:ss[Z]');
this.xsltParameter['unixTimestamp'] = now.unix();
// set OAI base url
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
this.xsltParameter['baseURL'] = baseDomain + '/oai';
this.xsltParameter['repURL'] = request.protocol() + '://' + request.hostname();
this.xsltParameter['downloadLink'] = request.protocol() + '://' + request.hostname() + '/file/download/';
this.xsltParameter['doiLink'] = 'https://doi.org/';
this.xsltParameter['doiPrefix'] = 'info:eu-repo/semantics/altIdentifier/doi/';
if (oaiRequest['verb']) {
const verb = oaiRequest['verb'];
this.xsltParameter['oai_verb'] = verb;
if (verb === 'Identify') {
this.handleIdentify();
} else if (verb === 'ListMetadataFormats') {
this.handleListMetadataFormats();
} else if (verb == 'GetRecord') {
await this.handleGetRecord(oaiRequest);
} else if (verb == 'ListRecords') {
// Get browser fingerprint from the request:
const browserFingerprint = this.getBrowserFingerprint(request);
await this.handleListRecords(oaiRequest, browserFingerprint);
} else if (verb == 'ListIdentifiers') {
// Get browser fingerprint from the request:
const browserFingerprint = this.getBrowserFingerprint(request);
await this.handleListIdentifiers(oaiRequest, browserFingerprint);
} else if (verb == 'ListSets') {
await this.handleListSets();
} else {
this.handleIllegalVerb();
}
} else {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The verb provided in the request is illegal.',
OaiErrorCodes.BADVERB,
);
}
}
protected handleIdentify() {
// Get configuration values from environment or a dedicated configuration service
const email = process.env.OAI_EMAIL ?? 'repository@geosphere.at';
const repositoryName = process.env.OAI_REPOSITORY_NAME ?? 'Tethys RDR';
const repIdentifier = process.env.OAI_REP_IDENTIFIER ?? 'tethys.at';
const sampleIdentifier = `oai:${repIdentifier}:1`;
// Dataset::earliestPublicationDate()->server_date_published->format('Y-m-d\TH:i:s\Z') : null;
// earliestDateFromDb!= null && (this.xsltParameter['earliestDatestamp'] = earliestDateFromDb?.server_date_published);
// set parameters for oai-pmh.xslt
this.xsltParameter['email'] = email;
this.xsltParameter['repositoryName'] = repositoryName;
this.xsltParameter['repIdentifier'] = repIdentifier;
this.xsltParameter['sampleIdentifier'] = sampleIdentifier;
// $this->proc->setParameter('', 'earliestDatestamp', $earliestDateFromDb);
this.xml.root().ele('Datasets');
}
protected handleListMetadataFormats() {
this.xml.root().ele('Datasets');
}
protected async handleListSets() {
const repIdentifier = 'tethys.at';
this.xsltParameter['repIdentifier'] = repIdentifier;
const datasetElement = this.xml.root().ele('Datasets');
const sets: { [key: string]: string } = {
'open_access': 'Set for open access licenses',
'openaire_data': 'OpenAIRE',
'doc-type:ResearchData': 'Set for document type ResearchData',
...(await this.getSetsForDatasetTypes()),
...(await this.getSetsForCollections()),
// ... await this.getSetsForProjects(),
} as Dictionary;
for (const [key, value] of Object.entries(sets)) {
const setElement = datasetElement.ele('Rdr_Sets');
setElement.att('Type', key);
setElement.att('TypeName', value);
}
}
protected async handleGetRecord(oaiRequest: Dictionary) {
const repIdentifier = 'tethys.at';
this.xsltParameter['repIdentifier'] = repIdentifier;
// Validate that required parameter exists early
if (!('identifier' in oaiRequest)) {
throw new BadOaiModelException('The prefix of the identifier argument is unknown.');
}
// Validate and extract the dataset identifier from the request
const dataId = this.validateAndGetIdentifier(oaiRequest);
// Retrieve dataset with associated XML cache and collection roles
const dataset = await Dataset.query()
.where('publish_id', dataId)
.preload('xmlCache')
.preload('collections', (builder) => {
builder.preload('collectionRole');
})
.first();
if (!dataset || !dataset.publish_id) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The value of the identifier argument is unknown or illegal in this repository.',
OaiErrorCodes.IDDOESNOTEXIST,
);
}
// Validate and set the metadata prefix parameter
const metadataPrefix = this.validateAndGetMetadataPrefix(oaiRequest);
this.xsltParameter['oai_metadataPrefix'] = metadataPrefix;
// Ensure that the dataset is in an exportable state
this.validateDatasetState(dataset);
// Build the XML for the dataset record and add it to the root node
const datasetNode = this.xml.root().ele('Datasets');
await this.createXmlRecord(dataset, datasetNode);
}
protected async handleListIdentifiers(oaiRequest: Dictionary, browserFingerprint: string) {
if (!this.tokenWorker.isConnected) {
await this.tokenWorker.connect();
}
const maxIdentifier: number = config.get('oai.max.listidentifiers', 100);
await this.handleLists(oaiRequest, maxIdentifier, browserFingerprint);
}
protected async handleListRecords(oaiRequest: Dictionary, browserFingerprint: string) {
if (!this.tokenWorker.isConnected) {
await this.tokenWorker.connect();
}
const maxRecords: number = config.get('oai.max.listrecords', 100);
await this.handleLists(oaiRequest, maxRecords, browserFingerprint);
}
private async handleLists(oaiRequest: Dictionary, maxRecords: number, browserFingerprint: string) {
const repIdentifier = 'tethys.at';
this.xsltParameter['repIdentifier'] = repIdentifier;
const datasetNode = this.xml.root().ele('Datasets');
const paginationParams: PagingParameter = {
cursor: 0,
totalLength: 0,
start: maxRecords + 1,
nextDocIds: [],
activeWorkIds: [],
metadataPrefix: '',
queryParams: {},
};
if ('resumptionToken' in oaiRequest) {
await this.handleResumptionToken(oaiRequest, maxRecords, paginationParams);
} else {
await this.handleNoResumptionToken(oaiRequest, paginationParams, maxRecords);
}
const nextIds: number[] = paginationParams.nextDocIds;
const workIds: number[] = paginationParams.activeWorkIds;
if (workIds.length === 0) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The combination of the given values results in an empty list.',
OaiErrorCodes.NORECORDSMATCH,
);
}
const datasets = await Dataset.query()
.whereIn('publish_id', workIds)
.preload('xmlCache')
.preload('collections', (builder) => {
builder.preload('collectionRole');
})
.orderBy('publish_id');
for (const dataset of datasets) {
await this.createXmlRecord(dataset, datasetNode);
}
await this.setResumptionToken(nextIds, paginationParams, browserFingerprint);
}
private async handleNoResumptionToken(oaiRequest: Dictionary, paginationParams: PagingParameter, maxRecords: number) {
this.validateMetadataPrefix(oaiRequest, paginationParams);
const finder: ModelQueryBuilderContract<typeof Dataset, Dataset> = Dataset.query().whereIn(
'server_state',
this.deliveringDocumentStates,
);
this.applySetFilter(finder, oaiRequest);
this.applyDateFilters(finder, oaiRequest);
await this.fetchAndSetResults(finder, paginationParams, oaiRequest, maxRecords);
}
private async fetchAndSetResults(
finder: ModelQueryBuilderContract<typeof Dataset, Dataset>,
paginationParams: PagingParameter,
oaiRequest: Dictionary,
maxRecords: number,
) {
const totalResult = await finder
.clone()
.count('* as total')
.first()
.then((res) => res?.$extras.total);
paginationParams.totalLength = Number(totalResult);
const combinedRecords: Dataset[] = await finder
.select('publish_id')
.orderBy('publish_id')
.offset(0)
.limit(maxRecords * 2);
paginationParams.activeWorkIds = combinedRecords.slice(0, 100).map((dat) => Number(dat.publish_id));
paginationParams.nextDocIds = combinedRecords.slice(100).map((dat) => Number(dat.publish_id));
// No resumption token was used set queryParams from the current oaiRequest
paginationParams.queryParams = {
...oaiRequest,
deliveringStates: this.deliveringDocumentStates,
};
// paginationParams.totalLength = 230;
}
private async handleResumptionToken(oaiRequest: Dictionary, maxRecords: number, paginationParams: PagingParameter) {
const resParam = oaiRequest['resumptionToken'];
const token = await this.tokenWorker.get(resParam);
if (!token) {
throw new OaiModelException(StatusCodes.INTERNAL_SERVER_ERROR, 'cache is outdated.', OaiErrorCodes.BADRESUMPTIONTOKEN);
}
// this.setResumptionParameters(token, maxRecords, paginationParams);
paginationParams.cursor = token.startPosition - 1;
paginationParams.start = token.startPosition + maxRecords;
paginationParams.totalLength = token.totalIds;
paginationParams.activeWorkIds = token.documentIds;
paginationParams.metadataPrefix = token.metadataPrefix;
paginationParams.queryParams = token.queryParams;
this.xsltParameter['oai_metadataPrefix'] = token.metadataPrefix;
const finder = this.buildDatasetQueryViaToken(token);
const nextRecords: Dataset[] = await this.fetchNextRecords(finder, token, maxRecords);
paginationParams.nextDocIds = nextRecords.map((dat) => Number(dat.publish_id));
}
private async setResumptionToken(nextIds: number[], paginationParams: PagingParameter, browserFingerprint: string) {
const countRestIds = nextIds.length;
if (countRestIds > 0) {
// const token = this.createResumptionToken(paginationParams, nextIds);
const token = new ResumptionToken();
token.startPosition = paginationParams.start;
token.totalIds = paginationParams.totalLength;
token.documentIds = nextIds;
token.metadataPrefix = paginationParams.metadataPrefix;
token.queryParams = paginationParams.queryParams;
const res: string = await this.tokenWorker.set(token, browserFingerprint);
this.setParamResumption(res, paginationParams.cursor, paginationParams.totalLength);
}
}
private buildDatasetQueryViaToken(token: ResumptionToken) {
const finder = Dataset.query();
const originalQuery = token.queryParams || {};
const deliveringStates = originalQuery.deliveringStates || this.deliveringDocumentStates;
finder.whereIn('server_state', deliveringStates);
this.applySetFilter(finder, originalQuery);
this.applyDateFilters(finder, originalQuery);
return finder;
}
private async fetchNextRecords(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, token: ResumptionToken, maxRecords: number) {
return finder
.select('publish_id')
.orderBy('publish_id')
.offset(token.startPosition - 1 + maxRecords)
.limit(100);
}
private validateMetadataPrefix(oaiRequest: Dictionary, paginationParams: PagingParameter) {
if (!('metadataPrefix' in oaiRequest)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The prefix of the metadata argument is unknown.',
OaiErrorCodes.BADARGUMENT,
);
}
paginationParams.metadataPrefix = oaiRequest['metadataPrefix'];
this.xsltParameter['oai_metadataPrefix'] = paginationParams.metadataPrefix;
}
private applySetFilter(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, queryParams: any) {
if ('set' in queryParams) {
const [setType, setValue] = queryParams['set'].split(':');
switch (setType) {
case 'data-type':
setValue && finder.where('type', setValue);
break;
case 'open_access':
finder.andWhereHas('licenses', (query) => {
query.whereIn('name', ['CC-BY-4.0', 'CC-BY-SA-4.0']);
});
break;
case 'ddc':
setValue &&
finder.andWhereHas('collections', (query) => {
query.where('number', setValue);
});
break;
}
}
}
private applyDateFilters(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, queryParams: any) {
const { from, until } = queryParams;
if (from && until) {
this.handleFromUntilFilter(finder, from, until);
} else if (from) {
this.handleFromFilter(finder, from);
} else if (until) {
this.handleUntilFilter(finder, until);
}
}
private handleFromUntilFilter(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, from: string, until: string) {
const fromDate = this.parseDateWithValidation(from, 'From');
const untilDate = this.parseDateWithValidation(until, 'Until');
if (from.length !== until.length) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The request has different granularities for the from and until parameters.',
OaiErrorCodes.BADARGUMENT,
);
}
finder.whereBetween('server_date_published', [fromDate.format('YYYY-MM-DD HH:mm:ss'), untilDate.format('YYYY-MM-DD HH:mm:ss')]);
}
private handleFromFilter(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, from: string) {
const fromDate = this.parseDateWithValidation(from, 'From');
const now = dayjs();
if (fromDate.isAfter(now)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'Given from date is greater than now. The given values results in an empty list.',
OaiErrorCodes.NORECORDSMATCH,
);
}
finder.andWhere('server_date_published', '>=', fromDate.format('YYYY-MM-DD HH:mm:ss'));
}
private handleUntilFilter(finder: ModelQueryBuilderContract<typeof Dataset, Dataset>, until: string) {
const untilDate = this.parseDateWithValidation(until, 'Until');
const earliestPublicationDate = dayjs(this.firstPublishedDataset?.server_date_published.toISO());
if (earliestPublicationDate.isAfter(untilDate)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'earliestDatestamp is greater than given until date. The given values results in an empty list.',
OaiErrorCodes.NORECORDSMATCH,
);
}
finder.andWhere('server_date_published', '<=', untilDate.format('YYYY-MM-DD HH:mm:ss'));
}
private parseDateWithValidation(dateStr: string, label: string) {
let date = dayjs(dateStr);
if (!date.isValid()) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
`${label} date parameter is not valid.`,
OaiErrorCodes.BADARGUMENT,
);
}
date = dayjs.tz(dateStr, 'Europe/Vienna');
return date.hour() === 0 ? (label === 'From' ? date.startOf('day') : date.endOf('day')) : date;
}
private setParamResumption(res: string, cursor: number, totalIds: number) {
const tomorrow = dayjs().add(1, 'day').format('YYYY-MM-DDThh:mm:ss[Z]');
this.xsltParameter['dateDelete'] = tomorrow;
this.xsltParameter['res'] = res;
this.xsltParameter['cursor'] = cursor;
this.xsltParameter['totalIds'] = totalIds;
}
private validateAndGetIdentifier(oaiRequest: Dictionary): number {
// Identifier references metadata Urn, not plain Id!
// Currently implemented as 'oai:foo.bar.de:{docId}' or 'urn:nbn...-123'
if (!('identifier' in oaiRequest)) {
throw new BadOaiModelException('The prefix of the identifier argument is unknown.');
}
const dataId = Number(this.getDocumentIdByIdentifier(oaiRequest.identifier));
if (isNaN(dataId)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The value of the identifier argument is illegal in this repository.',
OaiErrorCodes.BADARGUMENT,
);
}
return dataId;
}
private validateAndGetMetadataPrefix(oaiRequest: Dictionary): string {
let metadataPrefix = '';
if ('metadataPrefix' in oaiRequest) {
metadataPrefix = oaiRequest['metadataPrefix'];
} else {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'The prefix of the metadata argument is unknown.',
OaiErrorCodes.BADARGUMENT,
);
}
return metadataPrefix;
}
private validateDatasetState(dataset: Dataset): void {
if (dataset.server_state == null || !this.deliveringDocumentStates.includes(dataset.server_state)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
'Document is not available for OAI export!',
OaiErrorCodes.NORECORDSMATCH,
);
}
}
private async createXmlRecord(dataset: Dataset, datasetNode: XMLBuilder) {
const domNode = await this.getDatasetXmlDomNode(dataset);
if (domNode) {
// add frontdoor url and data-type
dataset.publish_id && this.addLandingPageAttribute(domNode, dataset.publish_id.toString());
this.addSpecInformation(domNode, 'data-type:' + dataset.type);
if (dataset.collections) {
for (const coll of dataset.collections) {
const collRole = coll.collectionRole;
this.addSpecInformation(domNode, collRole.oai_name + ':' + coll.number);
}
}
datasetNode.import(domNode);
}
}
private async getDatasetXmlDomNode(dataset: Dataset) {
const serializer = new DatasetXmlSerializer(dataset).enableCaching().excludeEmptyFields();
// const cache = dataset.xmlCache ? dataset.xmlCache : null;
// dataset.load('xmlCache');
if (dataset.xmlCache) {
serializer.setCache(dataset.xmlCache);
}
// return cache.toXmlDocument();
const xmlDocument: XMLBuilder | null = await serializer.toXmlDocument();
return xmlDocument;
}
private addSpecInformation(domNode: XMLBuilder, information: string) {
domNode.ele('SetSpec').att('Value', information);
}
private addLandingPageAttribute(domNode: XMLBuilder, dataid: string) {
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
const url = 'https://' + getDomain(baseDomain) + '/dataset/' + dataid;
// add attribute du dataset xml element
domNode.att('landingpage', url);
}
private getDocumentIdByIdentifier(oaiIdentifier: string): string {
const identifierParts: string[] = oaiIdentifier.split(':'); // explode(":", $oaiIdentifier);
const dataId: string = identifierParts[2];
// switch (identifierParts[0]) {
// case 'oai':
// if (isset($identifierParts[2])) {
// $dataId = $identifierParts[2];
// }
// break;
// default:
// throw new OaiModelException(
// 'The prefix of the identifier argument is unknown.',
// OaiModelError::BADARGUMENT
// );
// break;
// }
// if (empty($dataId) or !preg_match('/^\d+$/', $dataId)) {
// throw new OaiModelException(
// 'The value of the identifier argument is unknown or illegal in this repository.',
// OaiModelError::IDDOESNOTEXIST
// );
return dataId;
}
private async getSetsForCollections(): Promise<Dictionary> {
const sets: { [key: string]: string } = {} as Dictionary;
const collections = await Collection.query()
.select('name', 'number', 'role_id')
.whereHas('collectionRole', (query) => {
query.where('visible_oai', true);
})
.preload('collectionRole');
collections.forEach((collection) => {
// if collection has a collection role (classification like ddc):
if (collection.number) {
// collection.load('collectionRole');
const setSpec = collection.collectionRole?.oai_name + ':' + collection.number;
sets[setSpec] = `Set ${collection.number} '${collection.name}'`;
}
});
return sets;
}
private async getSetsForDatasetTypes(): Promise<Dictionary> {
const sets: { [key: string]: string } = {} as Dictionary;
const datasets: Array<Dataset> = await Dataset.query().select('type').where('server_state', 'published');
datasets.forEach((dataset) => {
if (dataset.type && false == preg_match(this.sampleRegEx, dataset.type)) {
const msg = `Invalid SetSpec (data-type='${dataset.type}').
Allowed characters are [${this.sampleRegEx}].`;
// Log::error("OAI-PMH: $msg");
logger.error(`OAI-PMH: ${msg}`);
return;
}
const setSpec = 'data-type:' + dataset.type;
sets[setSpec] = `Set for document type '${dataset.type}'`;
});
return sets;
}
private handleIllegalVerb() {
this.xsltParameter['oai_error_code'] = 'badVerb';
this.xsltParameter['oai_error_message'] = 'The verb provided in the request is illegal.';
}
/**
* Helper method to build a browser fingerprint by combining:
* - User-Agent header,
* - the IP address,
* - Accept-Language header,
* - current timestamp rounded to the hour.
*
* Every new hour, this will return a different fingerprint.
*/
private getBrowserFingerprint(request: Request): string {
const userAgent = request.header('user-agent') || 'unknown';
// Check for X-Forwarded-For header to use the client IP from the proxy if available.
const xForwardedFor = request.header('x-forwarded-for');
let ip = request.ip();
// console.log(ip);
if (xForwardedFor) {
// X-Forwarded-For may contain a comma-separated list of IPs; the first one is the client IP.
ip = xForwardedFor.split(',')[0].trim();
// console.log('xforwardedfor ip' + ip);
}
const locale = request.header('accept-language') || 'default';
// Round the current time to the start of the hour.
const timestampHour = dayjs().startOf('hour').format('YYYY-MM-DDTHH');
return `${userAgent}-${ip}-${locale}-${timestampHour}`;
}
}