import { Controller, Get } from "@overnightjs/core";
import Sequelize from "sequelize";
import { NextFunction, Request, Response } from "express";
import { StatusCodes } from "http-status-codes";
import { create } from "xmlbuilder2";
import { XMLBuilder } from "xmlbuilder2/lib/interfaces";
import { readFileSync } from "fs";
// @ts-ignore
import { transform } from "saxon-js";
import dayjs, { Dayjs, OpUnitType } from "dayjs";
import { Dataset, Project, License } from "../models/init-models";
import Logger from "jet-logger";
import { BadOaiModelException, OaiModelException } from "../exceptions/OaiModelException";
import PageNotFoundException from "../exceptions/PageNotFoundException";
import { OaiErrorCodes } from "../exceptions/OaiErrorCodes";
import XmlModel from "../library/XmlModel";
import Configuration from "../library/oai/OaiConfiguration";
import ResumptionToken from "../library/oai/ResumptionToken";
import TokenWorker from "../library/oai/TokenWorker";
interface XslTParameter {
[key: string]: any;
}
interface OaiParameter {
[key: string]: any;
}
interface IDictionary {
[index: string]: string;
}
function preg_match(regex: RegExp, str: string) {
const result: boolean = regex.test(str);
return result;
}
@Controller("oai")
export class OaiController {
private deliveringDocumentStates = ["published", "deleted"];
private sampleRegEx = /^[A-Za-zäüÄÜß0-9\-_.!~]+$/;
private xsltParameter: XslTParameter;
private configuration: Configuration;
private tokenWorker: TokenWorker;
/**
* Holds xml representation of document information to be processed.
*
* @var xmlbuilder.XMLDocument | null Defaults to null.
*/
private xml: XMLBuilder;
private proc;
constructor() {
this.proc = readFileSync(__dirname + "/datasetxml2oai.sef.json");
this.configuration = new Configuration();
}
@Get("")
public async index(request: Request, response: Response, next: NextFunction) {
this.xml = create(
{ version: "1.0", encoding: "UTF-8", standalone: true },
"",
// {
// keepNullNodes: false,
// keepNullAttributes: false,
// headless: false,
// ignoreDecorators: false,
// separateArrayItems: false,
// noDoubleEncoding: false,
// noValidation: false,
// invalidCharReplacement: undefined,
// stringify: {},
// },
);
// this.proc = new XSLTProcessor();
// const stylesheet = readFileSync(__dirname + "/datasetxml2oai.sef.json");
const xsltParameter = (this.xsltParameter = {});
let earliestDateFromDb;
const firstPublishedDataset: Dataset | null = await Dataset.earliestPublicationDate();
firstPublishedDataset != null &&
(earliestDateFromDb = dayjs(firstPublishedDataset.server_date_published).format("YYYY-MM-DDThh:mm:ss[Z]"));
this.xsltParameter["earliestDatestamp"] = earliestDateFromDb;
const oaiRequest: OaiParameter = request.query;
try {
await this.handleRequest(oaiRequest, request);
} catch (error) {
// return next(error);
if (error instanceof OaiModelException) {
this.xsltParameter["oai_error_code"] = error.oaiCode;
this.xsltParameter["oai_error_message"] = error.message;
} else {
// return next(error); // passing to default express middleware error handler
this.xsltParameter["oai_error_code"] = "unknown";
this.xsltParameter["oai_error_message"] = "An internal error occured.";
}
}
// catch (error) { // manually catching
// return next(error); // passing to default express middleware error handler
// }
const xmlString = this.xml.end({ prettyPrint: true });
// let data = await transform({
// stylesheetText: stylesheet,
// // stylesheetBaseURI: "my-stylesheet.sef.json",
// sourceText: xmlString,
// destination: "serialized"
// });
// .then((data: any) => {
// response.writeHead(200, {'Content-Type': 'application/xml'});
// response.write(data.principalResult);
// response.end();
// });
let xmlOutput;
try {
const result = await transform({
// stylesheetFileName: `${config.TMP_BASE_DIR}/data-quality/rules/iati.sef.json`,
stylesheetText: this.proc,
destination: "serialized",
// sourceFileName: sourceFile,
sourceText: xmlString,
stylesheetParams: xsltParameter,
// logLevel: 10,
});
xmlOutput = result.principalResult;
} catch (error) {
// return next(error);
// if (error instanceof OaiModelException) {
// this.xsltParameter["oai_error_code"] = error.oaiCode;
// this.xsltParameter["oai_error_message"] = error.message;
// } else {
// // return next(error); // passing to default express middleware error handler
// this.xsltParameter["oai_error_code"] = "unknown";
// this.xsltParameter["oai_error_message"] = "An internal error occured.";
// }
return next(error);
}
response
.header("Content-Type", "application/xml")
.header("Access-Control-Allow-Origin", "*")
.header("Access-Control-Allow-Methods", "GET,POST");
response.status(StatusCodes.OK).send(xmlOutput);
// response.end();
}
protected async handleRequest(oaiRequest: OaiParameter, request: Request) {
// Setup stylesheet
// $this->loadStyleSheet('datasetxml2oai-pmh.xslt');
// Set response time
const now: dayjs.Dayjs = dayjs();
this.xsltParameter["responseDate"] = now.format("YYYY-MM-DDThh:mm:ss[Z]");
this.xsltParameter["unixTimestamp"] = now.unix();
// set OAI base url
const baseDomain = process.env.BASE_DOMAIN || "localhost";
this.xsltParameter["baseURL"] = baseDomain + "/oai";
this.xsltParameter["repURL"] = request.protocol + "://" + request.get("host");
this.xsltParameter["downloadLink"] = request.protocol + "://" + request.get("host") + "/file/download/";
this.xsltParameter["doiLink"] = "https://doi.org/";
this.xsltParameter["doiPrefix"] = "info:eu-repo/semantics/altIdentifier/doi/";
if (oaiRequest["verb"]) {
const verb = oaiRequest["verb"];
this.xsltParameter["oai_verb"] = verb;
if (verb == "Identify") {
this.handleIdentify();
} else if (verb == "ListMetadataFormats") {
this.handleListMetadataFormats();
} else if (verb == "GetRecord") {
await this.handleGetRecord(oaiRequest);
} else if (verb == "ListRecords") {
await this.handleListRecords(oaiRequest);
} else if (verb == "ListIdentifiers") {
await this.handleListIdentifiers(oaiRequest);
} else if (verb == "ListSets") {
await this.handleListSets();
} else {
this.handleIllegalVerb();
}
} else {
// const err = new HttpException(404, 'Not Found')
// next(err);
// try {
// console.log("Async code example.")
// const err = new HttpException(404, 'Not Found');
const err = new PageNotFoundException("verb not found");
throw err;
// } catch (error) { // manually catching
// next(error); // passing to default middleware error handler
// }
}
}
protected handleIdentify() {
const email = "repository@geologie.ac.at";
const repositoryName = "Tethys RDR";
const repIdentifier = "tethys.at";
const sampleIdentifier = "oai:" + repIdentifier + ":1"; //$this->_configuration->getSampleIdentifier();
// Dataset::earliestPublicationDate()->server_date_published->format('Y-m-d\TH:i:s\Z') : null;
// earliestDateFromDb!= null && (this.xsltParameter['earliestDatestamp'] = earliestDateFromDb?.server_date_published);
// set parameters for oai-pmh.xslt
this.xsltParameter["email"] = email;
this.xsltParameter["repositoryName"] = repositoryName;
this.xsltParameter["repIdentifier"] = repIdentifier;
this.xsltParameter["sampleIdentifier"] = sampleIdentifier;
// $this->proc->setParameter('', 'earliestDatestamp', $earliestDateFromDb);
this.xml.root().ele("Datasets");
}
/**
* Implements response for OAI-PMH verb 'ListMetadataFormats'.
*
* @param array &$oaiRequest Contains full request information
* @return void
*/
protected handleListMetadataFormats() {
this.xml.root().ele("Datasets");
}
protected async handleListSets() {
const repIdentifier = "tethys.at";
this.xsltParameter["repIdentifier"] = repIdentifier;
const datasetElement = this.xml.root().ele("Datasets");
const sets: { [key: string]: string } = {
open_access: "Set for open access licenses",
// 'bibliography:true' => 'Set for bibliographic entries',
// 'bibliography:false' => 'Set for non-bibliographic entries',
...(await this.getSetsForDatasetTypes()),
// ... await this.getSetsForProjects(),
} as IDictionary;
for (const [key, value] of Object.entries(sets)) {
const setElement = datasetElement.ele("Rdr_Sets");
setElement.att("Type", key);
setElement.att("TypeName", value);
}
}
protected async handleGetRecord(oaiRequest: OaiParameter) {
// GetRecord&metadataPrefix=oai_dc&identifier=oai:tethys.at:1
const repIdentifier = "tethys.at";
this.xsltParameter["repIdentifier"] = repIdentifier;
// Identifier references metadata Urn, not plain Id!
// Currently implemented as 'oai:foo.bar.de:{docId}' or 'urn:nbn...-123'
if (!("identifier" in oaiRequest)) {
// throw new BadOaiModelException('The prefix of the identifier argument is unknown.');
throw new BadOaiModelException("The prefix of the identifier argument is unknown.");
}
const dataId = Number(this.getDocumentIdByIdentifier(oaiRequest.identifier));
// let dataset: Dataset | null;
const dataset = await Dataset.findOne({
where: { publish_id: dataId },
include: ["xmlCache"],
// order: ['server_date_published'],
});
if (!dataset || !dataset.publish_id) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
"The value of the identifier argument is unknown or illegal in this repository.",
OaiErrorCodes.IDDOESNOTEXIST,
);
}
let metadataPrefix = null;
if ("metadataPrefix" in oaiRequest) {
metadataPrefix = oaiRequest["metadataPrefix"];
} else {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
"The prefix of the metadata argument is unknown.",
OaiErrorCodes.BADARGUMENT,
);
}
this.xsltParameter["oai_metadataPrefix"] = metadataPrefix;
// do not deliver datasets which are restricted by document state
if (dataset.server_state == null || !this.deliveringDocumentStates.includes(dataset.server_state)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
"Document is not available for OAI export!",
OaiErrorCodes.NORECORDSMATCH,
);
}
// add xml elements
const datasetNode = this.xml.root().ele("Datasets");
await this.createXmlRecord(dataset, datasetNode);
// let domNode = await this.getDatasetXmlDomNode(dataset);
// // add frontdoor url
// // dataset.publish_id = dataset.publish_id != null ? dataset.publish_id : 0;
// this.addLandingPageAttribute(domNode, dataset.publish_id.toString());
// this.addSpecInformation(domNode, "data-type:" + dataset.type);
// datasetNode.import(domNode);
}
/**
* Implements response for OAI-PMH verb 'ListRecords'.
*
* @param array &$oaiRequest Contains full request information
* @return void
*/
protected async handleListRecords(oaiRequest: OaiParameter) {
if (!this.tokenWorker) {
this.tokenWorker = new TokenWorker(86400);
}
!this.tokenWorker.Connected && (await this.tokenWorker.connect());
//$maxRecords = 30; //$this->_configuration->getMaxListRecords();
const maxRecords = this.configuration.maxListRecs;
await this.handlingOfLists(oaiRequest, maxRecords);
await this.tokenWorker.close();
}
/**
* Implements response for OAI-PMH verb 'ListIdentifiers'.
*
* @param array &$oaiRequest Contains full request information
* @return void
*/
protected async handleListIdentifiers(oaiRequest: OaiParameter) {
// if ("resumptionToken" in oaiRequest) {
if (!this.tokenWorker) {
this.tokenWorker = new TokenWorker(86400);
}
!this.tokenWorker.Connected && (await this.tokenWorker.connect());
//$maxIdentifier = 5; //$this->_configuration->getMaxListIdentifiers();
const maxIdentifier = this.configuration.maxListIds; //->getMaxListIdentifiers();
await this.handlingOfLists(oaiRequest, maxIdentifier);
await this.tokenWorker.close();
}
private async handlingOfLists(oaiRequest: OaiParameter, maxRecords: number) {
if (!maxRecords) {
maxRecords = 100;
}
const repIdentifier = "tethys.at";
// //$this->_configuration->getResumptionTokenPath();
// $tokenTempPath = storage_path('app' . DIRECTORY_SEPARATOR . 'resumption');
this.xsltParameter["repIdentifier"] = repIdentifier;
const datasetNode = this.xml.root().ele("Datasets");
// // do some initialisation
let cursor = 0;
let totalIds = 0;
let start = maxRecords + 1;
let reldocIds: (number | null)[] = [];
let metadataPrefix = null;
// const tokenWorker = new TokenWorker(86400);
// await tokenWorker.connect();
// $tokenWorker->setResumptionPath($tokenTempPath);
// const url = process.env.REDIS_URL || "redis://redis:6379";
// const redisClient = createClient({
// url
// });
// redisClient.on('error', (error) => {
// const err = new InternalServerErrorException("Error occured while connecting or accessing redis server'");
// throw err;
// });
// resumptionToken is defined
if ("resumptionToken" in oaiRequest) {
const resParam = oaiRequest["resumptionToken"]; //e.g. "158886496600000"
// let token = await tokenWorker.getResumptionToken(resParam);
const token = await this.tokenWorker.get(resParam);
if (!token) {
throw new OaiModelException(StatusCodes.INTERNAL_SERVER_ERROR, "cache is outdated.", OaiErrorCodes.BADRESUMPTIONTOKEN);
}
cursor = token.StartPosition - 1; //startet dann bei Index 10
start = token.StartPosition + maxRecords;
totalIds = token.TotalIds;
reldocIds = token.DocumentIds;
metadataPrefix = token.MetadataPrefix;
this.xsltParameter["oai_metadataPrefix"] = metadataPrefix;
} else {
// no resumptionToken is given
if ("metadataPrefix" in oaiRequest) {
metadataPrefix = oaiRequest["metadataPrefix"];
} else {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
"The prefix of the metadata argument is unknown.",
OaiErrorCodes.BADARGUMENT,
);
}
this.xsltParameter["oai_metadataPrefix"] = metadataPrefix;
// add server state restrictions
const includeArray: Array = [];
const andArray: Array = new Array({
server_state: {
[Sequelize.Op.in]: this.deliveringDocumentStates,
},
});
// andArray.push({
// server_state: {
// [Sequelize.Op.in]: this.deliveringDocumentStates,
// },
// });
if ("set" in oaiRequest) {
const set = oaiRequest["set"] as string;
const setArray = set.split(":");
if (setArray[0] == "data-type") {
if (setArray.length == 2 && setArray[1]) {
andArray.push({
type: {
[Sequelize.Op.eq]: setArray[1],
},
});
}
} else if (setArray[0] == "open_access") {
const openAccessLicences = ["CC-BY-4.0", "CC-BY-SA-4.0"];
let icncludeFilter = {
model: License,
as: "licenses",
required: true, //return only records which have an associated model INNER JOIN
where: {
name: {
[Sequelize.Op.in]: openAccessLicences,
},
},
};
includeArray.push(icncludeFilter);
}
}
// &from=2020-09-03&until2020-09-03
// &from=2020-09-11&until=2021-05-11
if ("from" in oaiRequest && "until" in oaiRequest) {
const from = oaiRequest["from"] as string;
let fromDate = dayjs(from);
const until = oaiRequest["until"] as string;
let untilDate = dayjs(until);
if (from.length != until.length) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
"The request has different granularities for the from and until parameters.",
OaiErrorCodes.BADARGUMENT,
);
}
fromDate.hour() == 0 && (fromDate = fromDate.startOf("day"));
untilDate.hour() == 0 && (untilDate = untilDate.endOf("day"));
andArray.push({
server_date_published: {
// [Sequelize.Op.between]: [fromDate, untilDate]
[Sequelize.Op.and]: {
[Sequelize.Op.gte]: fromDate.format("YYYY-MM-DD HH:mm:ss"),
[Sequelize.Op.lte]: untilDate.format("YYYY-MM-DD HH:mm:ss"),
},
},
});
} else if ("from" in oaiRequest && !("until" in oaiRequest)) {
const from = oaiRequest["from"] as string;
let fromDate = dayjs(from);
fromDate.hour() == 0 && (fromDate = fromDate.startOf("day"));
const now = dayjs();
if (fromDate.isAfter(now)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
"Given from date is greater than now. The given values results in an empty list.",
OaiErrorCodes.NORECORDSMATCH,
);
} else {
// $finder->where('server_date_published', '>=', $fromDate);
andArray.push({
server_date_published: {
[Sequelize.Op.gte]: fromDate.format("YYYY-MM-DD HH:mm:ss"),
},
});
}
} else if (!("from" in oaiRequest) && "until" in oaiRequest) {
const until = oaiRequest["until"] as string;
let untilDate = dayjs(until);
untilDate.hour() == 0 && (untilDate = untilDate.endOf("day"));
const firstPublishedDataset: Dataset = (await Dataset.earliestPublicationDate()) as Dataset;
const earliestPublicationDate = dayjs(firstPublishedDataset.server_date_published); //format("YYYY-MM-DDThh:mm:ss[Z]"));
if (earliestPublicationDate.isAfter(untilDate)) {
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
`earliestDatestamp is greater than given until date.
The given values results in an empty list.`,
OaiErrorCodes.NORECORDSMATCH,
);
} else {
// $finder->where('server_date_published', '<=', $untilDate);
andArray.push({
server_date_published: {
[Sequelize.Op.lte]: untilDate.format("YYYY-MM-DD HH:mm:ss"),
},
});
}
}
reldocIds = (
await Dataset.findAll({
attributes: ["publish_id"],
where: andArray,
order: ["publish_id"],
include: includeArray,
raw: true,
// logging: (sql, queryObject) => {
// const test = sql;
// },
})
).map((dat) => dat.publish_id);
// reldocIds = await Dataset.findAll({
// // attributes: ["publish_id"],
// where: andArray,
// include: ["xmlCache"],
// order: ["server_date_published"],
// // logging: (sql, queryObject) => {
// // const test = sql;
// // },
// });
totalIds = reldocIds.length; //184
} //else resumptionToekn
// // handling of document ids
const restIds = reldocIds as number[];
const workIds = restIds.splice(0, maxRecords) as number[]; // array_splice(restIds, 0, maxRecords);
// no records returned
if (workIds.length == 0) {
// await tokenWorker.close();
throw new OaiModelException(
StatusCodes.INTERNAL_SERVER_ERROR,
"The combination of the given values results in an empty list.",
OaiErrorCodes.NORECORDSMATCH,
);
}
//foreach ($datasets as $dataset)
const datasets: Dataset[] = await Dataset.findAll({
// attributes: ["publish_id"],
where: {
publish_id: {
[Sequelize.Op.in]: workIds,
},
},
include: ["xmlCache"],
order: ["publish_id"],
});
for (const dataset of datasets) {
// let dataset = Dataset.findOne({
// where: {'publish_id': dataId}
// });
await this.createXmlRecord(dataset, datasetNode);
}
// store the further Ids in a resumption-file
const countRestIds = restIds.length; //84
if (countRestIds > 0) {
const token = new ResumptionToken();
token.StartPosition = start; //101
token.TotalIds = totalIds; //184
token.DocumentIds = restIds; //101 -184
token.MetadataPrefix = metadataPrefix;
// $tokenWorker->storeResumptionToken($token);
const res = await this.tokenWorker.set(token);
// set parameters for the resumptionToken-node
// const res = token.ResumptionId;
this.setParamResumption(res, cursor, totalIds);
}
}
/**
* Set parameters for resumptionToken-line.
*
* @param string $res value of the resumptionToken
* @param int $cursor value of the cursor
* @param int $totalIds value of the total Ids
*/
private setParamResumption(res: string, cursor: number, totalIds: number) {
const tomorrow = dayjs().add(1, "day").format("YYYY-MM-DDThh:mm:ss[Z]");
this.xsltParameter["dateDelete"] = tomorrow;
this.xsltParameter["res"] = res;
this.xsltParameter["cursor"] = cursor;
this.xsltParameter["totalIds"] = totalIds;
}
private addSpecInformation(domNode: XMLBuilder, information: string) {
domNode.ele("SetSpec").att("Value", information);
}
private addLandingPageAttribute(domNode: XMLBuilder, dataid: string) {
const baseDomain = process.env.BASE_DOMAIN || "localhost";
const url = "https://" + this.getDomain(baseDomain) + "/dataset/" + dataid;
// add attribute du dataset xml element
domNode.att("landingpage", url);
}
private getDomain(host: string): string {
// $myhost = strtolower(trim($host));
let myHost: string = host.trim().toLocaleLowerCase();
// $count = substr_count($myhost, '.');
const count: number = myHost.split(",").length - 1;
if (count == 2) {
const words = myHost.split(".");
if (words[1].length > 3) {
myHost = myHost.split(".", 2)[1];
}
} else if (count > 2) {
myHost = this.getDomain(myHost.split(".", 2)[1]);
}
myHost = myHost.replace(new RegExp(/^.*:\/\//i, "g"), "");
return myHost;
}
private getDocumentIdByIdentifier(oaiIdentifier: string): string {
const identifierParts: string[] = oaiIdentifier.split(":"); // explode(":", $oaiIdentifier);
const dataId: string = identifierParts[2];
// switch (identifierParts[0]) {
// case 'oai':
// if (isset($identifierParts[2])) {
// $dataId = $identifierParts[2];
// }
// break;
// default:
// throw new OaiModelException(
// 'The prefix of the identifier argument is unknown.',
// OaiModelError::BADARGUMENT
// );
// break;
// }
// if (empty($dataId) or !preg_match('/^\d+$/', $dataId)) {
// throw new OaiModelException(
// 'The value of the identifier argument is unknown or illegal in this repository.',
// OaiModelError::IDDOESNOTEXIST
// );
return dataId;
}
private async createXmlRecord(dataset: Dataset, datasetNode: XMLBuilder) {
const domNode = await this.getDatasetXmlDomNode(dataset);
// add frontdoor url and data-type
// if (dataset.publish_id) {
dataset.publish_id && this.addLandingPageAttribute(domNode, dataset.publish_id.toString());
// }
this.addSpecInformation(domNode, "data-type:" + dataset.type);
datasetNode.import(domNode);
}
private async getDatasetXmlDomNode(dataset: Dataset) {
// dataset.fetchValues();
const xmlModel = new XmlModel(dataset);
// xmlModel.setModel(dataset);
xmlModel.excludeEmptyFields();
// const cache = dataset.xmlCache ? dataset.xmlCache : new DocumentXmlCache();
if (dataset.xmlCache) {
xmlModel.setXmlCache = dataset.xmlCache;
}
xmlModel.caching = true;
// return cache.getDomDocument();
const domDocument = await xmlModel.getDomDocument();
return domDocument;
}
private async getSetsForProjects(): Promise {
// const setSpecPattern = this.SET_SPEC_PATTERN;
const sets: { [key: string]: string } = {} as IDictionary;
const projects: Array = await Project.findAll({
attributes: ["label"],
raw: true,
});
projects.forEach((project) => {
if (false == preg_match(this.sampleRegEx, project.label)) {
const msg = `Invalid SetSpec (project='${project.label}').
Allowed characters are [${this.sampleRegEx}].`;
Logger.err(`OAI: ${msg}`);
// Log::error("OAI-PMH: $msg");
return;
}
const setSpec = "project:" + project.label;
sets[setSpec] = `Set for project '${project.label}'`;
});
return sets;
}
private async getSetsForDatasetTypes(): Promise {
const sets: { [key: string]: string } = {} as IDictionary;
const datasets: Array = await Dataset.findAll({
attributes: ["type"],
where: { server_state: { [Sequelize.Op.eq]: "published" } },
});
datasets.forEach((dataset) => {
if (dataset.type && false == preg_match(this.sampleRegEx, dataset.type)) {
const msg = `Invalid SetSpec (data-type='${dataset.type}').
Allowed characters are [${this.sampleRegEx}].`;
Logger.err(`OAI: ${msg}`);
// Log::error("OAI-PMH: $msg");
return;
}
const setSpec = "data-type:" + dataset.type;
sets[setSpec] = `Set for document type '${dataset.type}'`;
});
return sets;
}
private handleIllegalVerb() {
this.xsltParameter["oai_error_code"] = "badVerb";
this.xsltParameter["oai_error_message"] = "The verb provided in the request is illegal.";
}
}