tethys.backend/app/Library/Utils/Index.ts
Arno Kaimbacher b5bbe26ec2
Some checks failed
build.yaml / feat: Enhance background job settings UI and functionality (push) Failing after 0s
feat: Enhance background job settings UI and functionality
- Updated BackgroundJob.vue to improve the display of background job statuses, including missing cross-references and current job mode.
- Added auto-refresh functionality for background job status.
- Introduced success toast notifications for successful status refreshes.
- Modified the XML serialization process in DatasetXmlSerializer for better caching and performance.
- Implemented a new RuleProvider for managing custom validation rules.
- Improved error handling in routes for loading background job settings.
- Enhanced ClamScan configuration with socket support for virus scanning.
- Refactored dayjs utility to streamline locale management.
2025-10-14 12:19:09 +02:00

195 lines
7.3 KiB
TypeScript

import Dataset from '#models/dataset';
import { Client } from '@opensearch-project/opensearch';
import { create } from 'xmlbuilder2';
import SaxonJS from 'saxon-js';
import DatasetXmlSerializer from '#app/Library/DatasetXmlSerializer';
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
import logger from '@adonisjs/core/services/logger';
import { readFileSync } from 'fs';
import { DateTime } from 'luxon';
// import Config from '@ioc:Adonis/Core/Config';
import { getDomain } from '#app/utils/utility-functions';
// const opensearchNode = process.env.OPENSEARCH_HOST || 'localhost';
// const client = new Client({ node: `http://${opensearchNode}` }); // replace with your OpenSearch endpoint
interface XslTParameter {
[key: string]: any;
}
export default {
// opensearchNode: process.env.OPENSEARCH_HOST || 'localhost',
client: new Client({ node: `http://${process.env.OPENSEARCH_HOST || 'localhost'}` }), // replace with your OpenSearch endpoint
async getDoiRegisterString(dataset: Dataset): Promise<string | undefined> {
try {
const proc = readFileSync('public/assets2/doi_datacite.sef.json');
const xsltParameter: XslTParameter = {};
let xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
const datasetNode = xml.root().ele('Dataset');
await createXmlRecord(dataset, datasetNode);
const xmlString = xml.end({ prettyPrint: false });
// set timestamp
const date = DateTime.now();
const unixTimestamp = date.toUnixInteger();
xsltParameter['unixTimestamp'] = unixTimestamp;
// set prefix
let prefix = '';
// let base_domain = '';
// const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug';
// if (datacite_environment === 'debug') {
// prefix = process.env.DATACITE_TEST_PREFIX || '';
// base_domain = process.env.TEST_BASE_DOMAIN || '';
// } else if (datacite_environment === 'production') {
// prefix = process.env.DATACITE_PREFIX || '';
// base_domain = process.env.BASE_DOMAIN || '';
// }
prefix = process.env.DATACITE_PREFIX || '';
xsltParameter['prefix'] = prefix;
const repIdentifier = 'tethys';
xsltParameter['repIdentifier'] = repIdentifier;
let xmlOutput; // = xmlString;
try {
const result = await SaxonJS.transform({
// stylesheetFileName: `${config.TMP_BASE_DIR}/data-quality/rules/iati.sef.json`,
stylesheetText: proc,
destination: 'serialized',
// sourceFileName: sourceFile,
sourceText: xmlString,
stylesheetParams: xsltParameter,
// logLevel: 10,
});
xmlOutput = result.principalResult;
} catch (error) {
logger.error('An error occurred while creating the user', error.message);
}
return xmlOutput;
} catch (error) {
logger.error(`An error occurred while indexing datsaet with publish_id ${dataset.publish_id}.`);
}
},
/**
* Index a dataset document to OpenSearch/Elasticsearch
*/
async indexDocument(dataset: Dataset, index_name: string): Promise<void> {
try {
// Load XSLT transformation file
const xsltProc = readFileSync('public/assets2/solr.sef.json');
// Transform dataset to JSON document
const jsonDoc: string = await this.getTransformedString(dataset, xsltProc);
const document = JSON.parse(jsonDoc);
// Index document to OpenSearch with doument json body
await this.client.index({
id: dataset.publish_id?.toString(),
index: index_name,
body: document,
refresh: true, // make immediately searchable
});
logger.info(`Dataset ${dataset.publish_id} successfully indexed to ${index_name}`);
} catch (error) {
logger.error(`Failed to index dataset ${dataset.publish_id}: ${error.message}`);
throw error; // Re-throw to allow caller to handle
}
},
/**
* Transform dataset XML to JSON using XSLT
*/
async getTransformedString(dataset: Dataset, proc: Buffer): Promise<string> {
// Generate XML string from dataset
const xmlString = await this.generateDatasetXml(dataset);
try {
// Apply XSLT transformation
const result = await SaxonJS.transform({
stylesheetText: proc,
destination: 'serialized',
sourceText: xmlString,
});
return result.principalResult;
} catch (error) {
logger.error(`An error occurred while creating the user, error: ${error.message},`);
return '';
}
},
/**
* Generate XML string from dataset model
*/
async generateDatasetXml(dataset: Dataset): Promise<string> {
const xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
const datasetNode = xml.root().ele('Dataset');
await createXmlRecord(dataset, datasetNode);
return xml.end({ prettyPrint: false });
},
};
/**
* Return the default global focus trap stack
*
* @return {import('focus-trap').FocusTrap[]}
*/
/**
* Create complete XML record for dataset
* Handles caching and metadata enrichment
*/
const createXmlRecord = async (dataset: Dataset, datasetNode: XMLBuilder): Promise<void> => {
const domNode = await getDatasetXmlDomNode(dataset);
if (!domNode) {
throw new Error(`Failed to generate XML DOM node for dataset ${dataset.id}`);
}
// Enrich with landing page URL
if (dataset.publish_id) {
addLandingPageAttribute(domNode, dataset.publish_id.toString());
}
// Add data type specification
addSpecInformation(domNode, `data-type:${dataset.type}`);
// Add collection information
if (dataset.collections) {
for (const coll of dataset.collections) {
const collRole = coll.collectionRole;
addSpecInformation(domNode, `${collRole.oai_name}:${coll.number}`);
}
}
datasetNode.import(domNode);
};
const getDatasetXmlDomNode = async (dataset: Dataset): Promise<XMLBuilder | null> => {
const serializer = new DatasetXmlSerializer(dataset).enableCaching().excludeEmptyFields();
// xmlModel.setModel(dataset);
// Load cache relationship if not already loaded
await dataset.load('xmlCache');
if (dataset.xmlCache) {
serializer.setCache(dataset.xmlCache);
}
// Generate or retrieve cached DOM document
const xmlDocument: XMLBuilder | null = await serializer.toXmlDocument();
return xmlDocument;
};
const addLandingPageAttribute = (domNode: XMLBuilder, dataid: string) => {
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
const url = 'https://' + getDomain(baseDomain) + '/dataset/' + dataid;
// add attribute du dataset xml element
domNode.att('landingpage', url);
};
const addSpecInformation = (domNode: XMLBuilder, information: string) => {
domNode.ele('SetSpec').att('Value', information);
};