feat: Enhance background job settings UI and functionality
Some checks failed
build.yaml / feat: Enhance background job settings UI and functionality (push) Failing after 0s

- Updated BackgroundJob.vue to improve the display of background job statuses, including missing cross-references and current job mode.
- Added auto-refresh functionality for background job status.
- Introduced success toast notifications for successful status refreshes.
- Modified the XML serialization process in DatasetXmlSerializer for better caching and performance.
- Implemented a new RuleProvider for managing custom validation rules.
- Improved error handling in routes for loading background job settings.
- Enhanced ClamScan configuration with socket support for virus scanning.
- Refactored dayjs utility to streamline locale management.
This commit is contained in:
Kaimbacher 2025-10-14 12:19:09 +02:00
commit b5bbe26ec2
27 changed files with 1221 additions and 603 deletions

View file

@ -1,6 +1,7 @@
import type { HttpContext } from '@adonisjs/core/http';
import Dataset from '#models/dataset';
import { StatusCodes } from 'http-status-codes';
import DatasetReference from '#models/dataset_reference';
// node ace make:controller Author
export default class DatasetController {
@ -81,11 +82,11 @@ export default class DatasetController {
.preload('licenses')
.preload('references')
.preload('project')
.preload('referenced_by', (builder) => {
builder.preload('dataset', (builder) => {
builder.preload('identifier');
});
})
// .preload('referenced_by', (builder) => {
// builder.preload('dataset', (builder) => {
// builder.preload('identifier');
// });
// })
.preload('files', (builder) => {
builder.preload('hashvalues');
})
@ -98,7 +99,17 @@ export default class DatasetController {
});
}
return response.status(StatusCodes.OK).json(dataset);
// Build the version chain
const versionChain = await this.buildVersionChain(dataset);
// Add version chain to response
const responseData = {
...dataset.toJSON(),
versionChain: versionChain,
};
// return response.status(StatusCodes.OK).json(dataset);
return response.status(StatusCodes.OK).json(responseData);
} catch (error) {
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
message: error.message || `Error retrieving Dataset with publish_id=${params.publish_id}.`,
@ -159,11 +170,11 @@ export default class DatasetController {
.preload('licenses')
.preload('references')
.preload('project')
.preload('referenced_by', (builder) => {
builder.preload('dataset', (builder) => {
builder.preload('identifier');
});
})
// .preload('referenced_by', (builder) => {
// builder.preload('dataset', (builder) => {
// builder.preload('identifier');
// });
// })
.preload('files', (builder) => {
builder.preload('hashvalues');
})
@ -175,12 +186,139 @@ export default class DatasetController {
message: `Cannot find Dataset with identifier=${identifierValue}.`,
});
}
// Build the version chain
const versionChain = await this.buildVersionChain(dataset);
return response.status(StatusCodes.OK).json(dataset);
// Add version chain to response
const responseData = {
...dataset.toJSON(),
versionChain: versionChain,
};
// return response.status(StatusCodes.OK).json(dataset);
return response.status(StatusCodes.OK).json(responseData);
} catch (error) {
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
message: error.message || `Error retrieving Dataset with identifier=${identifierValue}.`,
});
}
}
/**
* Build the complete version chain for a dataset
* Traverses both backwards (previous versions) and forwards (newer versions)
*/
private async buildVersionChain(dataset: Dataset) {
const versionChain = {
current: {
id: dataset.id,
publish_id: dataset.publish_id,
doi: dataset.identifier?.value || null,
main_title: dataset.mainTitle || null,
server_date_published: dataset.server_date_published,
},
previousVersions: [] as any[],
newerVersions: [] as any[],
};
// Get all previous versions (going backwards in time)
versionChain.previousVersions = await this.getPreviousVersions(dataset.id);
// Get all newer versions (going forwards in time)
versionChain.newerVersions = await this.getNewerVersions(dataset.id);
return versionChain;
}
/**
* Recursively get all previous versions
*/
private async getPreviousVersions(datasetId: number, visited: Set<number> = new Set()): Promise<any[]> {
// Prevent infinite loops
if (visited.has(datasetId)) {
return [];
}
visited.add(datasetId);
const previousVersions: any[] = [];
// Find references where this dataset "IsNewVersionOf" another dataset
const previousRefs = await DatasetReference.query()
.where('document_id', datasetId)
.where('relation', 'IsNewVersionOf')
.whereNotNull('related_document_id');
for (const ref of previousRefs) {
if (!ref.related_document_id) continue;
const previousDataset = await Dataset.query()
.where('id', ref.related_document_id)
.preload('identifier')
.preload('titles')
.first();
if (previousDataset) {
const versionInfo = {
id: previousDataset.id,
publish_id: previousDataset.publish_id,
doi: previousDataset.identifier?.value || null,
main_title: previousDataset.mainTitle || null,
server_date_published: previousDataset.server_date_published,
relation: 'IsPreviousVersionOf', // From perspective of current dataset
};
previousVersions.push(versionInfo);
// Recursively get even older versions
const olderVersions = await this.getPreviousVersions(previousDataset.id, visited);
previousVersions.push(...olderVersions);
}
}
return previousVersions;
}
/**
* Recursively get all newer versions
*/
private async getNewerVersions(datasetId: number, visited: Set<number> = new Set()): Promise<any[]> {
// Prevent infinite loops
if (visited.has(datasetId)) {
return [];
}
visited.add(datasetId);
const newerVersions: any[] = [];
// Find references where this dataset "IsPreviousVersionOf" another dataset
const newerRefs = await DatasetReference.query()
.where('document_id', datasetId)
.where('relation', 'IsPreviousVersionOf')
.whereNotNull('related_document_id');
for (const ref of newerRefs) {
if (!ref.related_document_id) continue;
const newerDataset = await Dataset.query().where('id', ref.related_document_id).preload('identifier').preload('titles').first();
if (newerDataset) {
const versionInfo = {
id: newerDataset.id,
publish_id: newerDataset.publish_id,
doi: newerDataset.identifier?.value || null,
main_title: newerDataset.mainTitle || null,
server_date_published: newerDataset.server_date_published,
relation: 'IsNewVersionOf', // From perspective of current dataset
};
newerVersions.push(versionInfo);
// Recursively get even newer versions
const evenNewerVersions = await this.getNewerVersions(newerDataset.id, visited);
newerVersions.push(...evenNewerVersions);
}
}
return newerVersions;
}
}

View file

@ -25,10 +25,10 @@ export default class FileController {
const dataset = file.dataset;
// Files from unpublished datasets are now blocked
if (dataset.server_state !== 'published') {
return response.status(StatusCodes.FORBIDDEN).send({
message: `File access denied: Dataset is not published.`,
});
}
return response.status(StatusCodes.FORBIDDEN).send({
message: `File access denied: Dataset is not published.`,
});
}
if (dataset && this.isUnderEmbargo(dataset.embargo_date)) {
return response.status(StatusCodes.FORBIDDEN).send({
message: `File is under embargo until ${dataset.embargo_date?.toFormat('yyyy-MM-dd')}`,
@ -36,12 +36,26 @@ export default class FileController {
}
// Proceed with file download
const filePath = '/storage/app/data/' + file.pathName;
const filePath = '/storage/app/data/' + file.pathName;
const fileExt = file.filePath.split('.').pop() || '';
// const fileName = file.label + fileExt;
const fileName = file.label.toLowerCase().endsWith(`.${fileExt.toLowerCase()}`)
? file.label
: `${file.label}.${fileExt}`;
// const fileName = file.label + fileExt;
const fileName = file.label.toLowerCase().endsWith(`.${fileExt.toLowerCase()}`) ? file.label : `${file.label}.${fileExt}`;
// Determine if file can be previewed inline in browser
const canPreviewInline = (mimeType: string): boolean => {
const type = mimeType.toLowerCase();
return (
type === 'application/pdf' ||
type.startsWith('image/') ||
type.startsWith('text/') ||
type === 'application/json' ||
type === 'application/xml' ||
// Uncomment if you want video/audio inline
type.startsWith('video/') ||
type.startsWith('audio/')
);
};
const disposition = canPreviewInline(file.mimeType) ? 'inline' : 'attachment';
try {
fs.accessSync(filePath, fs.constants.R_OK); //| fs.constants.W_OK);
@ -51,7 +65,7 @@ export default class FileController {
.header('Cache-Control', 'no-cache private')
.header('Content-Description', 'File Transfer')
.header('Content-Type', file.mimeType)
.header('Content-Disposition', 'inline; filename=' + fileName)
.header('Content-Disposition', `${disposition}; filename="${fileName}"`)
.header('Content-Transfer-Encoding', 'binary')
.header('Access-Control-Allow-Origin', '*')
.header('Access-Control-Allow-Methods', 'GET');

View file

@ -3,7 +3,7 @@ import { Client } from '@opensearch-project/opensearch';
import User from '#models/user';
import Dataset from '#models/dataset';
import DatasetIdentifier from '#models/dataset_identifier';
import XmlModel from '#app/Library/XmlModel';
import DatasetXmlSerializer from '#app/Library/DatasetXmlSerializer';
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
import { create } from 'xmlbuilder2';
import { readFileSync } from 'fs';
@ -574,55 +574,88 @@ export default class DatasetsController {
public async doiStore({ request, response }: HttpContext) {
const dataId = request.param('publish_id');
const dataset = await Dataset.query()
// .preload('xmlCache')
.where('publish_id', dataId)
.firstOrFail();
// Load dataset with minimal required relationships
const dataset = await Dataset.query().where('publish_id', dataId).firstOrFail();
const prefix = process.env.DATACITE_PREFIX || '';
const base_domain = process.env.BASE_DOMAIN || '';
// Generate DOI metadata XML
const xmlMeta = (await Index.getDoiRegisterString(dataset)) as string;
let prefix = '';
let base_domain = '';
// const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug';
prefix = process.env.DATACITE_PREFIX || '';
base_domain = process.env.BASE_DOMAIN || '';
// Prepare DOI registration data
const doiValue = `${prefix}/tethys.${dataset.publish_id}`; //'10.21388/tethys.213'
const landingPageUrl = `https://doi.${getDomain(base_domain)}/${prefix}/tethys.${dataset.publish_id}`; //https://doi.dev.tethys.at/10.21388/tethys.213
// register DOI:
const doiValue = prefix + '/tethys.' + dataset.publish_id; //'10.21388/tethys.213'
const landingPageUrl = 'https://doi.' + getDomain(base_domain) + '/' + prefix + '/tethys.' + dataset.publish_id; //https://doi.dev.tethys.at/10.21388/tethys.213
// Register DOI with DataCite
const doiClient = new DoiClient();
const dataciteResponse = await doiClient.registerDoi(doiValue, xmlMeta, landingPageUrl);
if (dataciteResponse?.status === 201) {
// if response OK 201; save the Identifier value into db
const doiIdentifier = new DatasetIdentifier();
doiIdentifier.value = doiValue;
doiIdentifier.dataset_id = dataset.id;
doiIdentifier.type = 'doi';
doiIdentifier.status = 'findable';
// save updated dataset to db an index to OpenSearch
try {
// save modified date of datset for re-caching model in db an update the search index
dataset.server_date_modified = DateTime.now();
// autoUpdate: true only triggers when dataset.save() is called, not when saving a related model like below
await dataset.save();
await dataset.related('identifier').save(doiIdentifier);
const index_name = 'tethys-records';
await Index.indexDocument(dataset, index_name);
} catch (error) {
logger.error(`${__filename}: Indexing document ${dataset.id} failed: ${error.message}`);
// Log the error or handle it as needed
throw new HttpException(error.message);
}
return response.toRoute('editor.dataset.list').flash('message', 'You have successfully created a DOI for the dataset!');
} else {
if (dataciteResponse?.status !== 201) {
const message = `Unexpected DataCite MDS response code ${dataciteResponse?.status}`;
// Log the error or handle it as needed
throw new DoiClientException(dataciteResponse?.status, message);
}
// DOI registration successful - persist and index
try {
// Save identifier
await this.persistDoiAndIndex(dataset, doiValue);
return response.toRoute('editor.dataset.list').flash('message', 'You have successfully created a DOI for the dataset!');
} catch (error) {
logger.error(`${__filename}: Failed to persist DOI and index dataset ${dataset.id}: ${error.message}`);
throw new HttpException(error.message);
}
// return response.toRoute('editor.dataset.list').flash('message', xmlMeta);
}
/**
* Persist DOI identifier and update search index
* Handles cache invalidation to ensure fresh indexing
*/
private async persistDoiAndIndex(dataset: Dataset, doiValue: string): Promise<void> {
// Create DOI identifier
const doiIdentifier = new DatasetIdentifier();
doiIdentifier.value = doiValue;
doiIdentifier.dataset_id = dataset.id;
doiIdentifier.type = 'doi';
doiIdentifier.status = 'findable';
// Save identifier (this will trigger database insert)
await dataset.related('identifier').save(doiIdentifier);
// Update dataset modification timestamp to reflect the change
dataset.server_date_modified = DateTime.now();
await dataset.save();
// Invalidate stale XML cache
await this.invalidateDatasetCache(dataset);
// Reload dataset with fresh state for indexing
const freshDataset = await Dataset.query().where('id', dataset.id).preload('identifier').preload('xmlCache').firstOrFail();
// Index to OpenSearch with fresh data
const index_name = process.env.OPENSEARCH_INDEX || 'tethys-records';
await Index.indexDocument(freshDataset, index_name);
logger.info(`Successfully created DOI ${doiValue} and indexed dataset ${dataset.id}`);
}
/**
* Invalidate XML cache for dataset
* Ensures fresh cache generation on next access
*/
private async invalidateDatasetCache(dataset: Dataset): Promise<void> {
await dataset.load('xmlCache');
if (dataset.xmlCache) {
await dataset.xmlCache.delete();
logger.debug(`Invalidated XML cache for dataset ${dataset.id}`);
}
}
public async show({}: HttpContext) {}
public async edit({ request, inertia, response }: HttpContext) {
@ -1124,14 +1157,14 @@ export default class DatasetsController {
// Set the response headers and download the file
response
.header('Cache-Control', 'no-cache private')
.header('Content-Description', 'File Transfer')
.header('Content-Type', file.mime_type || 'application/octet-stream')
// .header('Content-Disposition', 'inline; filename=' + fileName)
.header('Content-Transfer-Encoding', 'binary')
.header('Access-Control-Allow-Origin', '*')
.header('Access-Control-Allow-Methods', 'GET');
response.attachment(fileName);
.header('Cache-Control', 'no-cache private')
.header('Content-Description', 'File Transfer')
.header('Content-Type', file.mime_type || 'application/octet-stream')
// .header('Content-Disposition', 'inline; filename=' + fileName)
.header('Content-Transfer-Encoding', 'binary')
.header('Access-Control-Allow-Origin', '*')
.header('Access-Control-Allow-Methods', 'GET');
response.attachment(fileName);
return response.download(filePath);
}
@ -1144,19 +1177,18 @@ export default class DatasetsController {
}
}
private async getDatasetXmlDomNode(dataset: Dataset) {
const xmlModel = new XmlModel(dataset);
private async getDatasetXmlDomNode(dataset: Dataset): Promise<XMLBuilder | null> {
const serializer = new DatasetXmlSerializer(dataset).enableCaching().excludeEmptyFields();
// xmlModel.setModel(dataset);
xmlModel.excludeEmptyFields();
xmlModel.caching = true;
// const cache = dataset.xmlCache ? dataset.xmlCache : null;
// dataset.load('xmlCache');
// Load existing cache if available
await dataset.load('xmlCache');
if (dataset.xmlCache) {
xmlModel.xmlCache = dataset.xmlCache;
serializer.setCache(dataset.xmlCache);
}
// return cache.getDomDocument();
const domDocument: XMLBuilder | null = await xmlModel.getDomDocument();
return domDocument;
const xmlDocument : XMLBuilder | null = await serializer.toXmlDocument();
return xmlDocument;
}
}

View file

@ -15,7 +15,7 @@ import { OaiModelException, BadOaiModelException } from '#app/exceptions/OaiMode
import Dataset from '#models/dataset';
import Collection from '#models/collection';
import { getDomain, preg_match } from '#app/utils/utility-functions';
import XmlModel from '#app/Library/XmlModel';
import DatasetXmlSerializer from '#app/Library/DatasetXmlSerializer';
import logger from '@adonisjs/core/services/logger';
import ResumptionToken from '#app/Library/Oai/ResumptionToken';
// import Config from '@ioc:Adonis/Core/Config';
@ -292,7 +292,7 @@ export default class OaiController {
this.xsltParameter['repIdentifier'] = repIdentifier;
const datasetNode = this.xml.root().ele('Datasets');
const paginationParams: PagingParameter ={
const paginationParams: PagingParameter = {
cursor: 0,
totalLength: 0,
start: maxRecords + 1,
@ -333,7 +333,7 @@ export default class OaiController {
}
private async handleNoResumptionToken(oaiRequest: Dictionary, paginationParams: PagingParameter, maxRecords: number) {
this.validateMetadataPrefix(oaiRequest, paginationParams);
this.validateMetadataPrefix(oaiRequest, paginationParams);
const finder: ModelQueryBuilderContract<typeof Dataset, Dataset> = Dataset.query().whereIn(
'server_state',
this.deliveringDocumentStates,
@ -347,16 +347,20 @@ export default class OaiController {
finder: ModelQueryBuilderContract<typeof Dataset, Dataset>,
paginationParams: PagingParameter,
oaiRequest: Dictionary,
maxRecords: number
maxRecords: number,
) {
const totalResult = await finder
.clone()
.count('* as total')
.first()
.then((res) => res?.$extras.total);
paginationParams.totalLength = Number(totalResult);
paginationParams.totalLength = Number(totalResult);
const combinedRecords: Dataset[] = await finder.select('publish_id').orderBy('publish_id').offset(0).limit(maxRecords*2);
const combinedRecords: Dataset[] = await finder
.select('publish_id')
.orderBy('publish_id')
.offset(0)
.limit(maxRecords * 2);
paginationParams.activeWorkIds = combinedRecords.slice(0, 100).map((dat) => Number(dat.publish_id));
paginationParams.nextDocIds = combinedRecords.slice(100).map((dat) => Number(dat.publish_id));
@ -602,19 +606,17 @@ export default class OaiController {
}
private async getDatasetXmlDomNode(dataset: Dataset) {
const xmlModel = new XmlModel(dataset);
// xmlModel.setModel(dataset);
xmlModel.excludeEmptyFields();
xmlModel.caching = true;
const serializer = new DatasetXmlSerializer(dataset).enableCaching().excludeEmptyFields();
// const cache = dataset.xmlCache ? dataset.xmlCache : null;
// dataset.load('xmlCache');
if (dataset.xmlCache) {
xmlModel.xmlCache = dataset.xmlCache;
serializer.setCache(dataset.xmlCache);
}
// return cache.getDomDocument();
const domDocument: XMLBuilder | null = await xmlModel.getDomDocument();
return domDocument;
// return cache.toXmlDocument();
const xmlDocument: XMLBuilder | null = await serializer.toXmlDocument();
return xmlDocument;
}
private addSpecInformation(domNode: XMLBuilder, information: string) {

View file

@ -0,0 +1,231 @@
import DocumentXmlCache from '#models/DocumentXmlCache';
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
import Dataset from '#models/dataset';
import Strategy from './Strategy.js';
import { builder } from 'xmlbuilder2';
import logger from '@adonisjs/core/services/logger';
/**
* Configuration for XML serialization
*
* @interface XmlSerializationConfig
*/
export interface XmlSerializationConfig {
/** The dataset model to serialize */
model: Dataset;
/** DOM representation (if available) */
dom?: XMLBuilder;
/** Fields to exclude from serialization */
excludeFields: Array<string>;
/** Whether to exclude empty fields */
excludeEmpty: boolean;
/** Base URI for xlink:ref elements */
baseUri: string;
}
/**
* Options for controlling serialization behavior
*/
export interface SerializationOptions {
/** Enable XML caching */
enableCaching?: boolean;
/** Exclude empty fields from output */
excludeEmptyFields?: boolean;
/** Custom base URI */
baseUri?: string;
/** Fields to exclude */
excludeFields?: string[];
}
/**
* DatasetXmlSerializer
*
* Handles XML serialization of Dataset models with intelligent caching.
* Generates XML representations and manages cache lifecycle to optimize performance.
*
* @example
* ```typescript
* const serializer = new DatasetXmlSerializer(dataset);
* serializer.enableCaching();
* serializer.excludeEmptyFields();
*
* const xmlDocument = await serializer.toXmlDocument();
* ```
*/
export default class DatasetXmlSerializer {
private readonly config: XmlSerializationConfig;
private readonly strategy: Strategy;
private cache: DocumentXmlCache | null = null;
private cachingEnabled = false;
constructor(dataset: Dataset, options: SerializationOptions = {}) {
this.config = {
model: dataset,
excludeEmpty: options.excludeEmptyFields ?? false,
baseUri: options.baseUri ?? '',
excludeFields: options.excludeFields ?? [],
};
this.strategy = new Strategy({
excludeEmpty: options.excludeEmptyFields ?? false,
baseUri: options.baseUri ?? '',
excludeFields: options.excludeFields ?? [],
model: dataset,
});
if (options.enableCaching) {
this.cachingEnabled = true;
}
}
/**
* Enable caching for XML generation
* When enabled, generated XML is stored in database for faster retrieval
*/
public enableCaching(): this {
this.cachingEnabled = true;
return this;
}
/**
* Disable caching for XML generation
*/
public disableCaching(): this {
this.cachingEnabled = false;
return this;
}
set model(model: Dataset) {
this.config.model = model;
}
/**
* Configure to exclude empty fields from XML output
*/
public excludeEmptyFields(): this {
this.config.excludeEmpty = true;
return this;
}
/**
* Set the cache instance directly (useful when preloading)
* @param cache - The DocumentXmlCache instance
*/
public setCache(cache: DocumentXmlCache): this {
this.cache = cache;
return this;
}
/**
* Get the current cache instance
*/
public getCache(): DocumentXmlCache | null {
return this.cache;
}
/**
* Get DOM document with intelligent caching
* Returns cached version if valid, otherwise generates new document
*/
public async toXmlDocument(): Promise<XMLBuilder | null> {
const dataset = this.config.model;
// Try to get from cache first
let cachedDocument: XMLBuilder | null = await this.retrieveFromCache();
if (cachedDocument) {
logger.debug(`Using cached XML for dataset ${dataset.id}`);
return cachedDocument;
}
// Generate fresh document
logger.debug(`[DatasetXmlSerializer] Cache miss - generating fresh XML for dataset ${dataset.id}`);
const freshDocument = await this.strategy.createDomDocument();
if (!freshDocument) {
logger.error(`[DatasetXmlSerializer] Failed to generate XML for dataset ${dataset.id}`);
return null;
}
// Cache if caching is enabled
if (this.cachingEnabled) {
await this.persistToCache(freshDocument, dataset);
}
// Extract the dataset-specific node
return this.extractDatasetNode(freshDocument);
}
/**
* Generate XML string representation
* Convenience method that converts XMLBuilder to string
*/
public async toXmlString(): Promise<string | null> {
const document = await this.toXmlDocument();
return document ? document.end({ prettyPrint: false }) : null;
}
/**
* Persist generated XML document to cache
* Non-blocking - failures are logged but don't interrupt the flow
*/
private async persistToCache(domDocument: XMLBuilder, dataset: Dataset): Promise<void> {
try {
this.cache = this.cache || new DocumentXmlCache();
this.cache.document_id = dataset.id;
this.cache.xml_version = 1;
this.cache.server_date_modified = dataset.server_date_modified.toFormat('yyyy-MM-dd HH:mm:ss');
this.cache.xml_data = domDocument.end();
await this.cache.save();
logger.debug(`Cached XML for dataset ${dataset.id}`);
} catch (error) {
logger.error(`Failed to cache XML for dataset ${dataset.id}: ${error.message}`);
// Don't throw - caching failure shouldn't break the flow
}
}
/**
* Extract the Rdr_Dataset node from full document
*/
private extractDatasetNode(domDocument: XMLBuilder): XMLBuilder | null {
const node = domDocument.find((n) => n.node.nodeName === 'Rdr_Dataset', false, true)?.node;
if (node) {
return builder({ version: '1.0', encoding: 'UTF-8', standalone: true }, node);
}
return domDocument;
}
/**
* Attempt to retrieve valid cached XML document
* Returns null if cache doesn't exist or is stale
*/
private async retrieveFromCache(): Promise<XMLBuilder | null> {
const dataset: Dataset = this.config.model;
if (!this.cache) {
return null;
}
// Check if cache is still valid
const actuallyCached = await DocumentXmlCache.hasValidEntry(dataset.id, dataset.server_date_modified);
if (!actuallyCached) {
logger.debug(`Cache invalid for dataset ${dataset.id}`);
return null;
}
//cache is actual return cached document
try {
if (this.cache) {
return this.cache.getDomDocument();
} else {
return null;
}
} catch (error) {
logger.error(`Failed to retrieve cached document for dataset ${dataset.id}: ${error.message}`);
return null;
}
}
}

View file

@ -2,7 +2,7 @@ import Dataset from '#models/dataset';
import { Client } from '@opensearch-project/opensearch';
import { create } from 'xmlbuilder2';
import SaxonJS from 'saxon-js';
import XmlModel from '#app/Library/XmlModel';
import DatasetXmlSerializer from '#app/Library/DatasetXmlSerializer';
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
import logger from '@adonisjs/core/services/logger';
import { readFileSync } from 'fs';
@ -72,31 +72,42 @@ export default {
}
},
/**
* Index a dataset document to OpenSearch/Elasticsearch
*/
async indexDocument(dataset: Dataset, index_name: string): Promise<void> {
try {
const proc = readFileSync('public/assets2/solr.sef.json');
const doc: string = await this.getTransformedString(dataset, proc);
// Load XSLT transformation file
const xsltProc = readFileSync('public/assets2/solr.sef.json');
let document = JSON.parse(doc);
// Transform dataset to JSON document
const jsonDoc: string = await this.getTransformedString(dataset, xsltProc);
const document = JSON.parse(jsonDoc);
// Index document to OpenSearch with doument json body
await this.client.index({
id: dataset.publish_id?.toString(),
index: index_name,
body: document,
refresh: true,
refresh: true, // make immediately searchable
});
logger.info(`dataset with publish_id ${dataset.publish_id} successfully indexed`);
logger.info(`Dataset ${dataset.publish_id} successfully indexed to ${index_name}`);
} catch (error) {
logger.error(`An error occurred while indexing datsaet with publish_id ${dataset.publish_id}.`);
logger.error(`Failed to index dataset ${dataset.publish_id}: ${error.message}`);
throw error; // Re-throw to allow caller to handle
}
},
/**
* Transform dataset XML to JSON using XSLT
*/
async getTransformedString(dataset: Dataset, proc: Buffer): Promise<string> {
let xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
const datasetNode = xml.root().ele('Dataset');
await createXmlRecord(dataset, datasetNode);
const xmlString = xml.end({ prettyPrint: false });
// Generate XML string from dataset
const xmlString = await this.generateDatasetXml(dataset);
try {
// Apply XSLT transformation
const result = await SaxonJS.transform({
stylesheetText: proc,
destination: 'serialized',
@ -108,6 +119,18 @@ export default {
return '';
}
},
/**
* Generate XML string from dataset model
*/
async generateDatasetXml(dataset: Dataset): Promise<string> {
const xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
const datasetNode = xml.root().ele('Dataset');
await createXmlRecord(dataset, datasetNode);
return xml.end({ prettyPrint: false });
},
};
/**
* Return the default global focus trap stack
@ -115,74 +138,49 @@ export default {
* @return {import('focus-trap').FocusTrap[]}
*/
// export const indexDocument = async (dataset: Dataset, index_name: string, proc: Buffer): Promise<void> => {
// try {
// const doc = await getJsonString(dataset, proc);
// let document = JSON.parse(doc);
// await client.index({
// id: dataset.publish_id?.toString(),
// index: index_name,
// body: document,
// refresh: true,
// });
// Logger.info(`dataset with publish_id ${dataset.publish_id} successfully indexed`);
// } catch (error) {
// Logger.error(`An error occurred while indexing datsaet with publish_id ${dataset.publish_id}.`);
// }
// };
// const getJsonString = async (dataset, proc): Promise<string> => {
// let xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
// const datasetNode = xml.root().ele('Dataset');
// await createXmlRecord(dataset, datasetNode);
// const xmlString = xml.end({ prettyPrint: false });
// try {
// const result = await transform({
// stylesheetText: proc,
// destination: 'serialized',
// sourceText: xmlString,
// });
// return result.principalResult;
// } catch (error) {
// Logger.error(`An error occurred while creating the user, error: ${error.message},`);
// return '';
// }
// };
/**
* Create complete XML record for dataset
* Handles caching and metadata enrichment
*/
const createXmlRecord = async (dataset: Dataset, datasetNode: XMLBuilder): Promise<void> => {
const domNode = await getDatasetXmlDomNode(dataset);
if (domNode) {
// add frontdoor url and data-type
dataset.publish_id && addLandingPageAttribute(domNode, dataset.publish_id.toString());
addSpecInformation(domNode, 'data-type:' + dataset.type);
if (dataset.collections) {
for (const coll of dataset.collections) {
const collRole = coll.collectionRole;
addSpecInformation(domNode, collRole.oai_name + ':' + coll.number);
}
}
datasetNode.import(domNode);
if (!domNode) {
throw new Error(`Failed to generate XML DOM node for dataset ${dataset.id}`);
}
// Enrich with landing page URL
if (dataset.publish_id) {
addLandingPageAttribute(domNode, dataset.publish_id.toString());
}
// Add data type specification
addSpecInformation(domNode, `data-type:${dataset.type}`);
// Add collection information
if (dataset.collections) {
for (const coll of dataset.collections) {
const collRole = coll.collectionRole;
addSpecInformation(domNode, `${collRole.oai_name}:${coll.number}`);
}
}
datasetNode.import(domNode);
};
const getDatasetXmlDomNode = async (dataset: Dataset): Promise<XMLBuilder | null> => {
const xmlModel = new XmlModel(dataset);
const serializer = new DatasetXmlSerializer(dataset).enableCaching().excludeEmptyFields();
// xmlModel.setModel(dataset);
xmlModel.excludeEmptyFields();
xmlModel.caching = true;
// const cache = dataset.xmlCache ? dataset.xmlCache : null;
// dataset.load('xmlCache');
// Load cache relationship if not already loaded
await dataset.load('xmlCache');
if (dataset.xmlCache) {
xmlModel.xmlCache = dataset.xmlCache;
serializer.setCache(dataset.xmlCache);
}
// return cache.getDomDocument();
const domDocument: XMLBuilder | null = await xmlModel.getDomDocument();
return domDocument;
// Generate or retrieve cached DOM document
const xmlDocument: XMLBuilder | null = await serializer.toXmlDocument();
return xmlDocument;
};
const addLandingPageAttribute = (domNode: XMLBuilder, dataid: string) => {
@ -192,6 +190,6 @@ const addLandingPageAttribute = (domNode: XMLBuilder, dataid: string) => {
domNode.att('landingpage', url);
};
const addSpecInformation= (domNode: XMLBuilder, information: string) => {
const addSpecInformation = (domNode: XMLBuilder, information: string) => {
domNode.ele('SetSpec').att('Value', information);
};
};

View file

@ -1,129 +0,0 @@
import DocumentXmlCache from '#models/DocumentXmlCache';
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
import Dataset from '#models/dataset';
import Strategy from './Strategy.js';
import { DateTime } from 'luxon';
import { builder } from 'xmlbuilder2';
/**
* This is the description of the interface
*
* @interface Conf
* @member {Model} model holds the current dataset model
* @member {XMLBuilder} dom holds the current DOM representation
* @member {Array<string>} excludeFields List of fields to skip on serialization.
* @member {boolean} excludeEmpty True, if empty fields get excluded from serialization.
* @member {string} baseUri Base URI for xlink:ref elements
*/
export interface Conf {
model: Dataset;
dom?: XMLBuilder;
excludeFields: Array<string>;
excludeEmpty: boolean;
baseUri: string;
}
export default class XmlModel {
private config: Conf;
// private strategy = null;
private cache: DocumentXmlCache | null = null;
private _caching = false;
private strategy: Strategy;
constructor(dataset: Dataset) {
// $this->strategy = new Strategy();// Opus_Model_Xml_Version1;
// $this->config = new Conf();
// $this->strategy->setup($this->config);
this.config = {
excludeEmpty: false,
baseUri: '',
excludeFields: [],
model: dataset,
};
this.strategy = new Strategy({
excludeEmpty: true,
baseUri: '',
excludeFields: [],
model: dataset,
});
}
set model(model: Dataset) {
this.config.model = model;
}
public excludeEmptyFields(): void {
this.config.excludeEmpty = true;
}
get xmlCache(): DocumentXmlCache | null {
return this.cache;
}
set xmlCache(cache: DocumentXmlCache) {
this.cache = cache;
}
get caching(): boolean {
return this._caching;
}
set caching(caching: boolean) {
this._caching = caching;
}
public async getDomDocument(): Promise<XMLBuilder | null> {
const dataset = this.config.model;
let domDocument: XMLBuilder | null = await this.getDomDocumentFromXmlCache();
if (domDocument == null) {
domDocument = await this.strategy.createDomDocument();
// domDocument = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
if (this._caching) {
// caching is desired:
this.cache = this.cache || new DocumentXmlCache();
this.cache.document_id = dataset.id;
this.cache.xml_version = 1; // (int)$this->strategy->getVersion();
this.cache.server_date_modified = dataset.server_date_modified.toFormat('yyyy-MM-dd HH:mm:ss');
this.cache.xml_data = domDocument.end();
await this.cache.save();
}
const node = domDocument.find(
(n) => {
const test = n.node.nodeName == 'Rdr_Dataset';
return test;
},
false,
true,
)?.node;
if (node != undefined) {
domDocument = builder({ version: '1.0', encoding: 'UTF-8', standalone: true }, node);
}
}
return domDocument;
}
private async getDomDocumentFromXmlCache(): Promise<XMLBuilder | null> {
const dataset: Dataset = this.config.model;
if (!this.cache) {
return null;
}
//.toFormat('YYYY-MM-DD HH:mm:ss');
let date: DateTime = dataset.server_date_modified;
const actuallyCached: boolean = await DocumentXmlCache.hasValidEntry(dataset.id, date);
if (!actuallyCached) {
return null;
}
//cache is actual return it for oai:
try {
if (this.cache) {
return this.cache.getDomDocument();
} else {
return null;
}
} catch (error) {
return null;
}
}
}

View file

@ -4,7 +4,8 @@ import { builder, create } from 'xmlbuilder2';
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
import db from '@adonisjs/lucid/services/db';
import { DateTime } from 'luxon';
import type { BelongsTo } from "@adonisjs/lucid/types/relations";
import type { BelongsTo } from '@adonisjs/lucid/types/relations';
import logger from '@adonisjs/core/services/logger';
export default class DocumentXmlCache extends BaseModel {
public static namingStrategy = new SnakeCaseNamingStrategy();
@ -66,33 +67,38 @@ export default class DocumentXmlCache extends BaseModel {
}
/**
* Check if a dataset in a specific xml version is already cached or not.
* Check if a valid (non-stale) cache entry exists
* Cache is valid only if it was created AFTER the dataset's last modification
*
* @param mixed datasetId
* @param mixed serverDateModified
* @returns {Promise<boolean>} Returns true on cached hit else false.
* @param datasetId - The dataset ID to check
* @param datasetServerDateModified - The dataset's last modification timestamp
* @returns true if valid cache exists, false otherwise
*/
// public static async hasValidEntry(datasetId: number, datasetServerDateModified: DateTime): Promise<boolean> {
// // const formattedDate = dayjs(datasetServerDateModified).format('YYYY-MM-DD HH:mm:ss');
// const query = Database.from(this.table)
// .where('document_id', datasetId)
// .where('server_date_modified', '2023-08-17 16:51:03')
// .first();
// const row = await query;
// return !!row;
// }
// Assuming 'DocumentXmlCache' has a table with a 'server_date_modified' column in your database
public static async hasValidEntry(datasetId: number, datasetServerDateModified: DateTime): Promise<boolean> {
const serverDateModifiedString: string = datasetServerDateModified.toFormat('yyyy-MM-dd HH:mm:ss'); // Convert DateTime to ISO string
const query = db.from(this.table)
const row = await db
.from(this.table)
.where('document_id', datasetId)
.where('server_date_modified', '>=', serverDateModifiedString) // Check if server_date_modified is newer or equal
.where('server_date_modified', '>', serverDateModifiedString) // Check if server_date_modified is newer or equal
.first();
const row = await query;
return !!row;
const isValid = !!row;
if (isValid) {
logger.debug(`Valid cache found for dataset ${datasetId}`);
} else {
logger.debug(`No valid cache for dataset ${datasetId} (dataset modified: ${serverDateModifiedString})`);
}
return isValid;
}
/**
* Invalidate (delete) cache entry
*/
public async invalidate(): Promise<void> {
await this.delete();
logger.debug(`Invalidated cache for document ${this.document_id}`);
}
}

View file

@ -55,8 +55,8 @@ export const createDatasetValidator = vine.compile(
.translatedLanguage({ mainLanguageField: 'language', typeField: 'type' }),
}),
)
// .minLength(1),
.arrayContainsTypes({ typeA: 'abstract', typeB: 'translated' }),
// .minLength(1),
.arrayContainsTypes({ typeA: 'abstract', typeB: 'translated' }),
authors: vine
.array(
vine.object({
@ -160,7 +160,8 @@ export const createDatasetValidator = vine.compile(
.fileScan({ removeInfected: true }),
)
.minLength(1),
}),);
}),
);
/**
* Validates the dataset's update action
@ -309,11 +310,13 @@ export const updateDatasetValidator = vine.compile(
.fileScan({ removeInfected: true }),
)
.dependentArrayMinLength({ dependentArray: 'fileInputs', min: 1 }),
fileInputs: vine.array(
vine.object({
label: vine.string().trim().maxLength(100),
}),
).optional(),
fileInputs: vine
.array(
vine.object({
label: vine.string().trim().maxLength(100),
}),
)
.optional(),
}),
);
@ -501,7 +504,7 @@ let messagesProvider = new SimpleMessagesProvider({
'files.array.minLength': 'At least {{ min }} file upload is required.',
'files.*.size': 'file size is to big',
'files.*.extnames': 'file extension is not supported',
'embargo_date.date.afterOrEqual': `Embargo date must be on or after ${dayjs().add(10, 'day').format('DD.MM.YYYY')}`,
'embargo_date.date.afterOrEqual': `Embargo date must be on or after ${dayjs().add(10, 'day').format('DD.MM.YYYY')}`,
});
createDatasetValidator.messagesProvider = messagesProvider;

View file

@ -8,20 +8,20 @@ export const createRoleValidator = vine.compile(
vine.object({
name: vine
.string()
.isUnique({ table: 'roles', column: 'name' })
.trim()
.minLength(3)
.maxLength(255)
.regex(/^[a-zA-Z0-9]+$/), //Must be alphanumeric with hyphens or underscores
.isUnique({ table: 'roles', column: 'name' })
.regex(/^[a-zA-Z0-9]+$/), // Must be alphanumeric
display_name: vine
.string()
.isUnique({ table: 'roles', column: 'display_name' })
.trim()
.minLength(3)
.maxLength(255)
.isUnique({ table: 'roles', column: 'display_name' })
.regex(/^[a-zA-Z0-9]+$/),
description: vine.string().trim().escape().minLength(3).maxLength(255).optional(),
permissions: vine.array(vine.number()).minLength(1), // define at least one permission for the new role
permissions: vine.array(vine.number()).minLength(1), // At least one permission required
}),
);
@ -29,21 +29,28 @@ export const updateRoleValidator = vine.withMetaData<{ roleId: number }>().compi
vine.object({
name: vine
.string()
// .unique(async (db, value, field) => {
// const result = await db.from('roles').select('id').whereNot('id', field.meta.roleId).where('name', value).first();
// return result.length ? false : true;
// })
.trim()
.minLength(3)
.maxLength(255)
.isUnique({
table: 'roles',
column: 'name',
whereNot: (field) => field.meta.roleId,
})
.regex(/^[a-zA-Z0-9]+$/),
display_name: vine
.string()
.trim()
.minLength(3)
.maxLength(255),
.maxLength(255)
.isUnique({
table: 'roles',
column: 'display_name',
whereNot: (field) => field.meta.roleId,
})
.regex(/^[a-zA-Z0-9]+$/),
description: vine.string().trim().escape().minLength(3).maxLength(255).optional(),
permissions: vine.array(vine.number()).minLength(1), // define at least one permission for the new role
permissions: vine.array(vine.number()).minLength(1), // At least one permission required
}),
);