import Dataset from '#models/dataset'; import { Client } from '@opensearch-project/opensearch'; import { create } from 'xmlbuilder2'; import SaxonJS from 'saxon-js'; import DatasetXmlSerializer from '#app/Library/DatasetXmlSerializer'; import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js'; import logger from '@adonisjs/core/services/logger'; import { readFileSync } from 'fs'; import { DateTime } from 'luxon'; // import Config from '@ioc:Adonis/Core/Config'; import { getDomain } from '#app/utils/utility-functions'; // const opensearchNode = process.env.OPENSEARCH_HOST || 'localhost'; // const client = new Client({ node: `http://${opensearchNode}` }); // replace with your OpenSearch endpoint interface XslTParameter { [key: string]: any; } export default { // opensearchNode: process.env.OPENSEARCH_HOST || 'localhost', client: new Client({ node: `http://${process.env.OPENSEARCH_HOST || 'localhost'}` }), // replace with your OpenSearch endpoint async getDoiRegisterString(dataset: Dataset): Promise { try { const proc = readFileSync('public/assets2/doi_datacite.sef.json'); const xsltParameter: XslTParameter = {}; let xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, ''); const datasetNode = xml.root().ele('Dataset'); await createXmlRecord(dataset, datasetNode); const xmlString = xml.end({ prettyPrint: false }); // set timestamp const date = DateTime.now(); const unixTimestamp = date.toUnixInteger(); xsltParameter['unixTimestamp'] = unixTimestamp; // set prefix let prefix = ''; // let base_domain = ''; // const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug'; // if (datacite_environment === 'debug') { // prefix = process.env.DATACITE_TEST_PREFIX || ''; // base_domain = process.env.TEST_BASE_DOMAIN || ''; // } else if (datacite_environment === 'production') { // prefix = process.env.DATACITE_PREFIX || ''; // base_domain = process.env.BASE_DOMAIN || ''; // } prefix = process.env.DATACITE_PREFIX || ''; xsltParameter['prefix'] = prefix; const repIdentifier = 'tethys'; xsltParameter['repIdentifier'] = repIdentifier; let xmlOutput; // = xmlString; try { const result = await SaxonJS.transform({ // stylesheetFileName: `${config.TMP_BASE_DIR}/data-quality/rules/iati.sef.json`, stylesheetText: proc, destination: 'serialized', // sourceFileName: sourceFile, sourceText: xmlString, stylesheetParams: xsltParameter, // logLevel: 10, }); xmlOutput = result.principalResult; } catch (error) { logger.error('An error occurred while creating the user', error.message); } return xmlOutput; } catch (error) { logger.error(`An error occurred while indexing datsaet with publish_id ${dataset.publish_id}.`); } }, /** * Index a dataset document to OpenSearch/Elasticsearch */ async indexDocument(dataset: Dataset, index_name: string): Promise { try { // Load XSLT transformation file const xsltProc = readFileSync('public/assets2/solr.sef.json'); // Transform dataset to JSON document const jsonDoc: string = await this.getTransformedString(dataset, xsltProc); const document = JSON.parse(jsonDoc); // Index document to OpenSearch with doument json body await this.client.index({ id: dataset.publish_id?.toString(), index: index_name, body: document, refresh: true, // make immediately searchable }); logger.info(`Dataset ${dataset.publish_id} successfully indexed to ${index_name}`); } catch (error) { logger.error(`Failed to index dataset ${dataset.publish_id}: ${error.message}`); throw error; // Re-throw to allow caller to handle } }, /** * Transform dataset XML to JSON using XSLT */ async getTransformedString(dataset: Dataset, proc: Buffer): Promise { // Generate XML string from dataset const xmlString = await this.generateDatasetXml(dataset); try { // Apply XSLT transformation const result = await SaxonJS.transform({ stylesheetText: proc, destination: 'serialized', sourceText: xmlString, }); return result.principalResult; } catch (error) { logger.error(`An error occurred while creating the user, error: ${error.message},`); return ''; } }, /** * Generate XML string from dataset model */ async generateDatasetXml(dataset: Dataset): Promise { const xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, ''); const datasetNode = xml.root().ele('Dataset'); await createXmlRecord(dataset, datasetNode); return xml.end({ prettyPrint: false }); }, }; /** * Return the default global focus trap stack * * @return {import('focus-trap').FocusTrap[]} */ /** * Create complete XML record for dataset * Handles caching and metadata enrichment */ const createXmlRecord = async (dataset: Dataset, datasetNode: XMLBuilder): Promise => { const domNode = await getDatasetXmlDomNode(dataset); if (!domNode) { throw new Error(`Failed to generate XML DOM node for dataset ${dataset.id}`); } // Enrich with landing page URL if (dataset.publish_id) { addLandingPageAttribute(domNode, dataset.publish_id.toString()); } // Add data type specification addSpecInformation(domNode, `data-type:${dataset.type}`); // Add collection information if (dataset.collections) { for (const coll of dataset.collections) { const collRole = coll.collectionRole; addSpecInformation(domNode, `${collRole.oai_name}:${coll.number}`); } } datasetNode.import(domNode); }; const getDatasetXmlDomNode = async (dataset: Dataset): Promise => { const serializer = new DatasetXmlSerializer(dataset).enableCaching().excludeEmptyFields(); // xmlModel.setModel(dataset); // Load cache relationship if not already loaded await dataset.load('xmlCache'); if (dataset.xmlCache) { serializer.setCache(dataset.xmlCache); } // Generate or retrieve cached DOM document const xmlDocument: XMLBuilder | null = await serializer.toXmlDocument(); return xmlDocument; }; const addLandingPageAttribute = (domNode: XMLBuilder, dataid: string) => { const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost'; const url = 'https://' + getDomain(baseDomain) + '/dataset/' + dataid; // add attribute du dataset xml element domNode.att('landingpage', url); }; const addSpecInformation = (domNode: XMLBuilder, information: string) => { domNode.ele('SetSpec').att('Value', information); };