- added doi registration
Some checks failed
CI Pipeline / japa-tests (push) Failing after 57s

- npm updates for webpack-encore and postcss-loader
- DatasetExtension.ts: use relation contributors for PersonContributor
- added DoiClient.ts and DoiClientContract.ts
- rozes.ts: addes routes for creating and storing doi identifier
- addes xslt doi_datacite.xslt needed for registering DOI identifier
This commit is contained in:
Kaimbacher 2024-01-26 09:39:03 +01:00
parent ebc62d9117
commit c9ba7d6adc
22 changed files with 1836 additions and 677 deletions

View file

@ -2,6 +2,7 @@ import type { HttpContextContract } from '@ioc:Adonis/Core/HttpContext';
import { Client } from '@opensearch-project/opensearch';
import User from 'App/Models/User';
import Dataset from 'App/Models/Dataset';
import DatasetIdentifier from 'App/Models/DatasetIdentifier';
import XmlModel from 'App/Library/XmlModel';
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces';
import { create } from 'xmlbuilder2';
@ -10,6 +11,12 @@ import { transform } from 'saxon-js';
import type { ModelQueryBuilderContract } from '@ioc:Adonis/Lucid/Orm';
import { schema, CustomMessages } from '@ioc:Adonis/Core/Validator';
import { DateTime } from 'luxon';
import Index from 'App/Library/Utils/Index';
import { getDomain } from 'App/Utils/utility-functions';
import { DoiClient } from 'App/Library/Doi/DoiClient';
import DoiClientException from 'App/Exceptions/DoiClientException';
import Logger from '@ioc:Adonis/Core/Logger';
import { HttpException } from 'node-exceptions';
// Create a new instance of the client
const client = new Client({ node: 'http://localhost:9200' }); // replace with your OpenSearch endpoint
@ -66,6 +73,7 @@ export default class DatasetsController {
.where('editor_id', user.id)
.doesntHave('identifier', 'and');
})
// .preload('identifier')
.preload('titles')
.preload('user', (query) => query.select('id', 'login'))
.preload('editor', (query) => query.select('id', 'login'))
@ -273,13 +281,79 @@ export default class DatasetsController {
dataset.publisher_name = publisherName;
if (await dataset.save()) {
const index_name = 'tethys-records';
await Index.indexDocument(dataset, index_name);
return response.toRoute('editor.dataset.list').flash('message', 'You have successfully published the dataset!');
}
}
public async create({}: HttpContextContract) {}
public async doiCreate({ request, inertia }: HttpContextContract) {
const id = request.param('id');
const dataset = await Dataset.query()
.where('id', id)
.preload('titles')
.preload('descriptions')
// .preload('identifier')
.preload('authors')
.firstOrFail();
return inertia.render('Editor/Dataset/Doi', {
dataset,
});
}
public async store({}: HttpContextContract) {}
public async doiStore({ request, response }: HttpContextContract) {
const dataId = request.param('publish_id');
const dataset = await Dataset.query()
// .preload('xmlCache')
.where('publish_id', dataId)
.firstOrFail();
const xmlMeta = (await Index.getDoiRegisterString(dataset)) as string;
let prefix = '';
let base_domain = '';
const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug';
if (datacite_environment === 'debug') {
prefix = process.env.DATACITE_TEST_PREFIX || '';
base_domain = process.env.TEST_BASE_DOMAIN || '';
} else if (datacite_environment === 'production') {
prefix = process.env.DATACITE_PREFIX || '';
base_domain = process.env.BASE_DOMAIN || '';
}
// register DOI:
const doiValue = prefix + '/tethys.' + dataset.publish_id; //'10.21388/tethys.213'
const landingPageUrl = 'https://doi.' + getDomain(base_domain) + '/' + prefix + '/tethys.' + dataset.publish_id; //https://doi.dev.tethys.at/10.21388/tethys.213
const doiClient = new DoiClient();
const dataciteResponse = await doiClient.registerDoi(doiValue, xmlMeta, landingPageUrl);
if (dataciteResponse?.status === 201) {
// if response OK 201; save the Identifier value into db
const doiIdentifier = new DatasetIdentifier();
doiIdentifier.value = doiValue;
doiIdentifier.dataset_id = dataset.id;
doiIdentifier.type = 'doi';
doiIdentifier.status = 'findable';
// save modified date of datset for re-caching model in db an update the search index
dataset.server_date_modified = DateTime.now();
// save updated dataset to db an index to OpenSearch
try {
await dataset.related('identifier').save(doiIdentifier);
const index_name = 'tethys-records';
await Index.indexDocument(dataset, index_name);
} catch (error) {
Logger.error(`${__filename}: Indexing document ${dataset.id} failed: ${error.message}`);
// Log the error or handle it as needed
throw new HttpException(error.message);
}
return response.toRoute('editor.dataset.list').flash('message', 'You have successfully created a DOI for the dataset!');
} else {
const message = `Unexpected DataCite MDS response code ${dataciteResponse?.status}`;
// Log the error or handle it as needed
throw new DoiClientException(dataciteResponse?.status, message);
}
// return response.toRoute('editor.dataset.list').flash('message', xmlMeta);
}
public async show({}: HttpContextContract) {}
@ -404,8 +478,6 @@ export default class DatasetsController {
public async destroy({}: HttpContextContract) {}
public async syncOpensearch({}: HttpContextContract) {}
private async createXmlRecord(dataset: Dataset, datasetNode: XMLBuilder) {
const domNode = await this.getDatasetXmlDomNode(dataset);
if (domNode) {

View file

@ -0,0 +1,110 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<root>
<Dataset>
<Rdr_Dataset Id="306" PublisherName="GeoSphere Austria" PublishId="213"
CreatingCorporation="Tethys RDR" Language="en" ServerState="published"
Type="measurementdata">
<CreatedAt Year="2023" Month="11" Day="30" Hour="10" Minute="20" Second="58"
UnixTimestamp="1701336058" Timezone="Europe/Berlin" />
<ServerDateModified Year="2024" Month="1" Day="22" Hour="12" Minute="28" Second="17"
UnixTimestamp="1705922897" Timezone="Europe/Berlin" />
<ServerDatePublished Year="2024" Month="1" Day="22" Hour="12" Minute="28" Second="8"
UnixTimestamp="1705922888" Timezone="Europe/Berlin" />
<TitleMain Id="682" DocumentId="306" Type="Main" Value="rewerewr" Language="en" />
<TitleAbstract Id="1017" DocumentId="306" Type="Abstract" Value="rewrewr" Language="en" />
<Licence Id="1" Active="true"
LinkLicence="https://creativecommons.org/licenses/by/4.0/deed.en"
LinkLogo="https://licensebuttons.net/l/by/4.0/88x31.png"
NameLong="Creative Commons Attribution 4.0 International (CC BY 4.0)"
Name="CC-BY-4.0" SortOrder="1" />
<PersonAuthor Id="1" Email="m.moser@univie.ac.at" FirstName="Michael" LastName="Moser"
Status="true" NameType="Personal" Role="author" SortOrder="1"
AllowEmailContact="false" />
<PersonContributor Id="28" Email="juergen.reitner@geologie.ac.at" FirstName="Jürgen"
LastName="Reitner" Status="false" NameType="Personal" Role="contributor"
SortOrder="1" AllowEmailContact="false" />
<Subject Id="143" Language="de" Type="Geoera" Value="Aletshausen-Langenneufnach Störung"
CreatedAt="2023-11-21 17:17:43" UpdatedAt="2023-11-21 17:17:43" />
<Subject Id="164" Language="de" Type="Geoera" Value="Wolfersberg-Moosach Störung"
ExternalKey="https://data.geoscience.earth/ncl/geoera/hotLime/faults/3503"
CreatedAt="2023-11-30 10:20:58" UpdatedAt="2023-11-30 10:20:58" />
<Subject Id="165" Language="en" Type="Uncontrolled" Value="wefwef"
CreatedAt="2023-11-30 10:20:58" UpdatedAt="2023-11-30 10:20:58" />
<File Id="1037" DocumentId="306" PathName="files/306/file-clpkzkkgq0001nds14fua5um6.png"
Label="freieIP.png" MimeType="image/png" FileSize="112237" VisibleInFrontdoor="true"
VisibleInOai="true" SortOrder="0" CreatedAt="2023-11-30 10:21:14"
UpdatedAt="2023-11-30 10:21:14" />
<Coverage Id="284" DatasetId="306" XMin="11.71142578125" XMax="14.414062500000002"
YMin="46.58906908309185" YMax="47.45780853075031" CreatedAt="2023-11-30 10:20:58"
UpdatedAt="2023-11-30 10:20:58" />
<Collection Id="21" RoleId="3" Number="551" Name="Geology, hydrology, meteorology"
ParentId="20" Visible="true" VisiblePublish="true" />
</Rdr_Dataset>
</Dataset>
</root>
<?xml version="1.0" encoding="utf-8"?>
<resource xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://datacite.org/schema/kernel-4"
xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.3/metadata.xsd">
<identifier identifierType="DOI">10.21388/tethys.213</identifier>
<creators>
<creator>
<creatorName nameType="Personal">Moser, Michael</creatorName>
<givenName>Michael</givenName>
<familyName>Moser</familyName>
<affiliation>GBA</affiliation>
</creator>
</creators>
<titles>
<title xml:lang="en">rewerewr</title>
</titles>
<publisher>Tethys RDR</publisher>
<publicationYear>2024</publicationYear>
<subjects>
<subject xml:lang="de">Aletshausen-Langenneufnach Störung</subject>
<subject xml:lang="de">Wolfersberg-Moosach Störung</subject>
<subject xml:lang="en">wefwef</subject>
</subjects>
<language>en</language>
<contributors>
<contributor contributorType="RegistrationAuthority">
<contributorName>Jürgen Reitner</contributorName>
</contributor>
</contributors>
<dates>
<date dateType="Created">2023-11-30</date>
</dates>
<version>1</version>
<resourceType resourceTypeGeneral="Dataset">Dataset</resourceType>
<alternateIdentifiers>
<alternateIdentifier alternateIdentifierType="url">https://www.tethys.at/dataset/213</alternateIdentifier>
</alternateIdentifiers>
<rightsList>
<rights xml:lang="" rightsURI="https://creativecommons.org/licenses/by/4.0/deed.en"
schemeURI="https://spdx.org/licenses/" rightsIdentifierScheme="SPDX"
rightsIdentifier="CC-BY-4.0">Creative Commons Attribution 4.0 International (CC BY 4.0)</rights>
<rights rightsURI="info:eu-repo/semantics/openAccess">Open Access</rights>
</rightsList>
<sizes>
<size>1 datasets</size>
</sizes>
<formats>
<format>image/png</format>
</formats>
<descriptions>
<description xml:lang="en" descriptionType="Abstract">rewrewr</description>
</descriptions>
<geoLocations>
<geoLocation>
<geoLocationBox>
<westBoundLongitude>11.71142578125</westBoundLongitude>
<eastBoundLongitude>14.414062500000002</eastBoundLongitude>
<southBoundLatitude>46.58906908309185</southBoundLatitude>
<northBoundLatitude>47.45780853075031</northBoundLatitude>
</geoLocationBox>
</geoLocation>
</geoLocations>
</resource>

View file

@ -0,0 +1,12 @@
class DoiClientException extends Error {
public status: number;
public message: string;
constructor(status: number, message: string) {
super(message);
this.status = status;
this.message = message;
}
}
export default DoiClientException;

View file

@ -0,0 +1,119 @@
// import { Client } from 'guzzle';
// import { Log } from '@adonisjs/core/build/standalone';
// import { DoiInterface } from './interfaces/DoiInterface';
import DoiClientContract from 'App/Library/Doi/DoiClientContract';
import DoiClientException from 'App/Exceptions/DoiClientException';
import { StatusCodes } from 'http-status-codes';
import Logger from '@ioc:Adonis/Core/Logger';
import axios, {AxiosResponse} from 'axios';
export class DoiClient implements DoiClientContract {
username: string;
password: string;
serviceUrl: string;
// prefix: string;
// base_domain: string;
constructor() {
const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug';
if (datacite_environment === 'debug') {
this.username = process.env.DATACITE_TEST_USERNAME || '';
this.password = process.env.DATACITE_TEST_PASSWORD || '';
this.serviceUrl = process.env.DATACITE_TEST_SERVICE_URL || '';
// this.prefix = process.env.DATACITE_TEST_PREFIX || '';
// this.base_domain = process.env.TEST_BASE_DOMAIN || '';
} else if (datacite_environment === 'production') {
this.username = process.env.DATACITE_USERNAME || '';
this.password = process.env.DATACITE_PASSWORD || '';
this.serviceUrl = process.env.DATACITE_SERVICE_URL || '';
// this.prefix = process.env.DATACITE_PREFIX || '';
// this.base_domain = process.env.BASE_DOMAIN || '';
}
if (this.username === '' || this.password === '' || this.serviceUrl === '') {
const message = 'issing configuration settings to properly initialize DOI client';
Logger.error(message);
throw new DoiClientException(StatusCodes.BAD_REQUEST, message);
}
}
/**
* Creates a DOI with the given identifier
*
* @param doiValue The desired DOI identifier e.g. '10.5072/tethys.999',
* @param xmlMeta
* @param landingPageUrl e.g. https://www.tethys.at/dataset/1
*
* @return Promise<number> The http response in the form of a axios response
*/
public async registerDoi(doiValue: string, xmlMeta: string, landingPageUrl: string): Promise<AxiosResponse<any>> {
//step 1: register metadata via xml upload
// state draft
let response;
let url = `${this.serviceUrl}/metadata/${doiValue}`; //https://mds.test.datacite.org/metadata/10.21388/tethys.213
const auth = {
username: this.username,
password: this.password,
};
let headers = {
'Content-Type': 'application/xml;charset=UTF-8',
};
try {
response = await axios.put(url, xmlMeta, {
auth,
headers,
});
} catch (error) {
const message = `request to ${url} failed with ${error.message}`;
// Handle the error, log it, or rethrow as needed
Logger.error(message);
throw new DoiClientException(StatusCodes.SERVICE_UNAVAILABLE, message);
}
// let test = response.data; // 'OK (10.21388/TETHYS.213)'
// Response Codes
// 201 Created: operation successful
// 401 Unauthorised: no login
// 403 Forbidden: login problem, quota exceeded
// 415 Wrong Content Type : Not including content type in the header.
// 422 Unprocessable Entity : invalid XML
if (response.status !== 201) {
const message = 'unexpected DataCite MDS response code ' + response.status;
// $this->log($message, 'err');
throw new DoiClientException(response.status, message);
}
// step 2: Register the DOI name
// // DOI und URL der Frontdoor des zugehörigen Dokuments übergeben: state findable
// const url2 = this.serviceUrl + "/doi/" + doiValue;
url = `${this.serviceUrl}/doi/${doiValue}`; //'https://mds.test.datacite.org/doi/10.21388/tethys.213'
headers = {
'Content-Type': 'text/plain;charset=UTF-8',
};
const data = `doi=${doiValue}\nurl=${landingPageUrl}`;
try {
response = await axios.put(url, data, {
auth,
headers,
});
// Access the response data using response.data
// Do something with the response.data
} catch (error) {
const message = `request to ${url} failed with ${error.message}`;
// Handle the error, log it, or rethrow as needed
throw new DoiClientException(response.status, message);
}
// Response Codes
// 201 Created: operation successful
// 400 Bad Request: request body must be exactly two lines: DOI and URL; wrong domain, wrong prefix;
// 401 Unauthorised: no login
// 403 Forbidden: login problem, quota exceeded
// 412 Precondition failed: metadata must be uploaded first.
if (response.status != 201) {
const message = 'unexpected DataCite MDS response code ' + response.status;
Logger.error(message);
throw new DoiClientException(response.status, message);
}
return response;
}
}

View file

@ -0,0 +1,12 @@
// import ResumptionToken from './ResumptionToken';
export default interface DoiClientContract {
username: string;
password: string;
serviceUrl: string;
// prefix: string;
// base_domain: string;
registerDoi(doiValue: string, xmlMeta: string, landingPageUrl: string);
// get(key: string): Promise<ResumptionToken | null>;
// set(token: ResumptionToken): Promise<string>;
}

View file

@ -110,7 +110,7 @@ export default class Strategy {
private mapModelAttributes(myObject, childNode: XMLBuilder) {
Object.keys(myObject).forEach((prop) => {
let value = myObject[prop];
console.log(`${prop}: ${value}`);
// console.log(`${prop}: ${value}`);
if (value != null) {
if (value instanceof DateTime) {
value = value.toFormat('yyyy-MM-dd HH:mm:ss').trim();

194
app/Library/Utils/Index.ts Normal file
View file

@ -0,0 +1,194 @@
import Dataset from 'App/Models/Dataset';
import { Client } from '@opensearch-project/opensearch';
import { create } from 'xmlbuilder2';
import { transform } from 'saxon-js';
import XmlModel from 'App/Library/XmlModel';
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces';
import Logger from '@ioc:Adonis/Core/Logger';
import { readFileSync } from 'fs';
import { DateTime } from 'luxon';
// import Config from '@ioc:Adonis/Core/Config';
import { getDomain } from 'App/Utils/utility-functions';
// const opensearchNode = process.env.OPENSEARCH_HOST || 'localhost';
// const client = new Client({ node: `http://${opensearchNode}` }); // replace with your OpenSearch endpoint
export default {
// opensearchNode: process.env.OPENSEARCH_HOST || 'localhost',
client: new Client({ node: `http://${process.env.OPENSEARCH_HOST || 'localhost'}` }), // replace with your OpenSearch endpoint
async getDoiRegisterString(dataset: Dataset): Promise<string | undefined> {
try {
const proc = readFileSync('public/assets2/doi_datacite.sef.json');
const xsltParameter = {};
let xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
const datasetNode = xml.root().ele('Dataset');
await createXmlRecord(dataset, datasetNode);
const xmlString = xml.end({ prettyPrint: false });
// set timestamp
const date = DateTime.now();
const unixTimestamp = date.toUnixInteger();
xsltParameter['unixTimestamp'] = unixTimestamp;
// set prefix
let prefix = '';
let base_domain = '';
const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug';
if (datacite_environment === 'debug') {
prefix = process.env.DATACITE_TEST_PREFIX || '';
base_domain = process.env.TEST_BASE_DOMAIN || '';
} else if (datacite_environment === 'production') {
prefix = process.env.DATACITE_PREFIX || '';
base_domain = process.env.BASE_DOMAIN || '';
}
xsltParameter['prefix'] = prefix;
const repIdentifier = 'tethys';
xsltParameter['repIdentifier'] = repIdentifier;
let xmlOutput; // = xmlString;
try {
const result = await transform({
// stylesheetFileName: `${config.TMP_BASE_DIR}/data-quality/rules/iati.sef.json`,
stylesheetText: proc,
destination: 'serialized',
// sourceFileName: sourceFile,
sourceText: xmlString,
stylesheetParams: xsltParameter,
// logLevel: 10,
});
xmlOutput = result.principalResult;
} catch (error) {
Logger.error('An error occurred while creating the user', error.message);
}
return xmlOutput;
} catch (error) {
Logger.error(`An error occurred while indexing datsaet with publish_id ${dataset.publish_id}.`);
}
},
async indexDocument(dataset: Dataset, index_name: string): Promise<void> {
try {
const proc = readFileSync('public/assets2/solr.sef.json');
const doc: string = await this.getTransformedString(dataset, proc);
let document = JSON.parse(doc);
await this.client.index({
id: dataset.publish_id?.toString(),
index: index_name,
body: document,
refresh: true,
});
Logger.info(`dataset with publish_id ${dataset.publish_id} successfully indexed`);
} catch (error) {
Logger.error(`An error occurred while indexing datsaet with publish_id ${dataset.publish_id}.`);
}
},
async getTransformedString(dataset, proc): Promise<string> {
let xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
const datasetNode = xml.root().ele('Dataset');
await createXmlRecord(dataset, datasetNode);
const xmlString = xml.end({ prettyPrint: false });
try {
const result = await transform({
stylesheetText: proc,
destination: 'serialized',
sourceText: xmlString,
});
return result.principalResult;
} catch (error) {
Logger.error(`An error occurred while creating the user, error: ${error.message},`);
return '';
}
},
};
/**
* Return the default global focus trap stack
*
* @return {import('focus-trap').FocusTrap[]}
*/
// export const indexDocument = async (dataset: Dataset, index_name: string, proc: Buffer): Promise<void> => {
// try {
// const doc = await getJsonString(dataset, proc);
// let document = JSON.parse(doc);
// await client.index({
// id: dataset.publish_id?.toString(),
// index: index_name,
// body: document,
// refresh: true,
// });
// Logger.info(`dataset with publish_id ${dataset.publish_id} successfully indexed`);
// } catch (error) {
// Logger.error(`An error occurred while indexing datsaet with publish_id ${dataset.publish_id}.`);
// }
// };
// const getJsonString = async (dataset, proc): Promise<string> => {
// let xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
// const datasetNode = xml.root().ele('Dataset');
// await createXmlRecord(dataset, datasetNode);
// const xmlString = xml.end({ prettyPrint: false });
// try {
// const result = await transform({
// stylesheetText: proc,
// destination: 'serialized',
// sourceText: xmlString,
// });
// return result.principalResult;
// } catch (error) {
// Logger.error(`An error occurred while creating the user, error: ${error.message},`);
// return '';
// }
// };
const createXmlRecord = async (dataset: Dataset, datasetNode: XMLBuilder): Promise<void> => {
const domNode = await getDatasetXmlDomNode(dataset);
if (domNode) {
// add frontdoor url and data-type
dataset.publish_id && addLandingPageAttribute(domNode, dataset.publish_id.toString());
addSpecInformation(domNode, 'data-type:' + dataset.type);
if (dataset.collections) {
for (const coll of dataset.collections) {
const collRole = coll.collectionRole;
addSpecInformation(domNode, collRole.oai_name + ':' + coll.number);
}
}
datasetNode.import(domNode);
}
};
const getDatasetXmlDomNode = async (dataset: Dataset): Promise<XMLBuilder | null> => {
const xmlModel = new XmlModel(dataset);
// xmlModel.setModel(dataset);
xmlModel.excludeEmptyFields();
xmlModel.caching = true;
// const cache = dataset.xmlCache ? dataset.xmlCache : null;
// dataset.load('xmlCache');
await dataset.load('xmlCache');
if (dataset.xmlCache) {
xmlModel.xmlCache = dataset.xmlCache;
}
// return cache.getDomDocument();
const domDocument: XMLBuilder | null = await xmlModel.getDomDocument();
return domDocument;
};
const addLandingPageAttribute = (domNode: XMLBuilder, dataid: string) => {
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
const url = 'https://' + getDomain(baseDomain) + '/dataset/' + dataid;
// add attribute du dataset xml element
domNode.att('landingpage', url);
};
const addSpecInformation= (domNode: XMLBuilder, information: string) => {
domNode.ele('SetSpec').att('Value', information);
};

View file

@ -20,6 +20,9 @@ export default class DatasetIdentifier extends BaseModel {
@column({})
public type: string;
@column({})
public status: string;
@column({})
public value: string;
@ -38,4 +41,9 @@ export default class DatasetIdentifier extends BaseModel {
foreignKey: 'dataset_id',
})
public dataset: BelongsTo<typeof Dataset>;
// // Specify the relationships to touch when this model is updated
// public static get touches() {
// return ['dataset'];
// }
}

View file

@ -82,7 +82,7 @@ export default abstract class DatasetExtension extends LucidBaseModel {
sort_order: 'sort_order',
allow_email_contact: 'allow_email_contact',
},
relation: 'persons',
relation: 'contributors',
fetch: 'eager',
},
Reference: {