feat: Enhance background job settings UI and functionality

- Updated BackgroundJob.vue to improve the display of background job statuses, including missing cross-references and current job mode. - Added auto-refresh functionality for background job status. - Introduced success toast notifications for successful status refreshes. - Modified the XML serialization process in DatasetXmlSerializer for better caching and performance. - Implemented a new RuleProvider for managing custom validation rules. - Improved error handling in routes for loading background job settings. - Enhanced ClamScan configuration with socket support for virus scanning. - Refactored dayjs utility to streamline locale management.
2025-10-14 12:19:09 +02:00 · 2025-10-14 12:19:09 +02:00 · b5bbe26ec2
commit b5bbe26ec2
parent 6757bdb77c
27 changed files with 1221 additions and 603 deletions
--- a/app/Controllers/Http/Api/DatasetController.ts
+++ b/app/Controllers/Http/Api/DatasetController.ts
@ -1,6 +1,7 @@
 import type { HttpContext } from '@adonisjs/core/http';
 import Dataset from '#models/dataset';
 import { StatusCodes } from 'http-status-codes';
+import DatasetReference from '#models/dataset_reference';

 // node ace make:controller Author
 export default class DatasetController {
@ -81,11 +82,11 @@ export default class DatasetController {
                .preload('licenses')
                .preload('references')
                .preload('project')
-                .preload('referenced_by', (builder) => {
-                    builder.preload('dataset', (builder) => {
-                        builder.preload('identifier');
-                    });
-                })
+                // .preload('referenced_by', (builder) => {
+                //     builder.preload('dataset', (builder) => {
+                //         builder.preload('identifier');
+                //     });
+                // })
                .preload('files', (builder) => {
                    builder.preload('hashvalues');
                })
@ -98,7 +99,17 @@ export default class DatasetController {
                });
            }

-            return response.status(StatusCodes.OK).json(dataset);
+            // Build the version chain
+            const versionChain = await this.buildVersionChain(dataset);
+
+            // Add version chain to response
+            const responseData = {
+                ...dataset.toJSON(),
+                versionChain: versionChain,
+            };
+
+            // return response.status(StatusCodes.OK).json(dataset);
+            return response.status(StatusCodes.OK).json(responseData);
        } catch (error) {
            return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
                message: error.message || `Error retrieving Dataset with publish_id=${params.publish_id}.`,
@ -159,11 +170,11 @@ export default class DatasetController {
                .preload('licenses')
                .preload('references')
                .preload('project')
-                .preload('referenced_by', (builder) => {
-                    builder.preload('dataset', (builder) => {
-                        builder.preload('identifier');
-                    });
-                })
+                // .preload('referenced_by', (builder) => {
+                //     builder.preload('dataset', (builder) => {
+                //         builder.preload('identifier');
+                //     });
+                // })
                .preload('files', (builder) => {
                    builder.preload('hashvalues');
                })
@ -175,12 +186,139 @@ export default class DatasetController {
                    message: `Cannot find Dataset with identifier=${identifierValue}.`,
                });
            }
+            // Build the version chain
+            const versionChain = await this.buildVersionChain(dataset);

-            return response.status(StatusCodes.OK).json(dataset);
+            // Add version chain to response
+            const responseData = {
+                ...dataset.toJSON(),
+                versionChain: versionChain,
+            };
+
+            // return response.status(StatusCodes.OK).json(dataset);
+            return response.status(StatusCodes.OK).json(responseData);
        } catch (error) {
            return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
                message: error.message || `Error retrieving Dataset with identifier=${identifierValue}.`,
            });
        }
    }
+
+    /**
+     * Build the complete version chain for a dataset
+     * Traverses both backwards (previous versions) and forwards (newer versions)
+     */
+    private async buildVersionChain(dataset: Dataset) {
+        const versionChain = {
+            current: {
+                id: dataset.id,
+                publish_id: dataset.publish_id,
+                doi: dataset.identifier?.value || null,
+                main_title: dataset.mainTitle || null,
+                server_date_published: dataset.server_date_published,
+            },
+            previousVersions: [] as any[],
+            newerVersions: [] as any[],
+        };
+
+        // Get all previous versions (going backwards in time)
+        versionChain.previousVersions = await this.getPreviousVersions(dataset.id);
+
+        // Get all newer versions (going forwards in time)
+        versionChain.newerVersions = await this.getNewerVersions(dataset.id);
+
+        return versionChain;
+    }
+
+    /**
+     * Recursively get all previous versions
+     */
+    private async getPreviousVersions(datasetId: number, visited: Set<number> = new Set()): Promise<any[]> {
+        // Prevent infinite loops
+        if (visited.has(datasetId)) {
+            return [];
+        }
+        visited.add(datasetId);
+
+        const previousVersions: any[] = [];
+
+        // Find references where this dataset "IsNewVersionOf" another dataset
+        const previousRefs = await DatasetReference.query()
+            .where('document_id', datasetId)
+            .where('relation', 'IsNewVersionOf')
+            .whereNotNull('related_document_id');
+
+        for (const ref of previousRefs) {
+            if (!ref.related_document_id) continue;
+
+            const previousDataset = await Dataset.query()
+                .where('id', ref.related_document_id)
+                .preload('identifier')
+                .preload('titles')
+                .first();
+
+            if (previousDataset) {
+                const versionInfo = {
+                    id: previousDataset.id,
+                    publish_id: previousDataset.publish_id,
+                    doi: previousDataset.identifier?.value || null,
+                    main_title: previousDataset.mainTitle || null,
+                    server_date_published: previousDataset.server_date_published,
+                    relation: 'IsPreviousVersionOf', // From perspective of current dataset
+                };
+
+                previousVersions.push(versionInfo);
+
+                // Recursively get even older versions
+                const olderVersions = await this.getPreviousVersions(previousDataset.id, visited);
+                previousVersions.push(...olderVersions);
+            }
+        }
+
+        return previousVersions;
+    }
+
+    /**
+     * Recursively get all newer versions
+     */
+    private async getNewerVersions(datasetId: number, visited: Set<number> = new Set()): Promise<any[]> {
+        // Prevent infinite loops
+        if (visited.has(datasetId)) {
+            return [];
+        }
+        visited.add(datasetId);
+
+        const newerVersions: any[] = [];
+
+        // Find references where this dataset "IsPreviousVersionOf" another dataset
+        const newerRefs = await DatasetReference.query()
+            .where('document_id', datasetId)
+            .where('relation', 'IsPreviousVersionOf')
+            .whereNotNull('related_document_id');
+
+        for (const ref of newerRefs) {
+            if (!ref.related_document_id) continue;
+
+            const newerDataset = await Dataset.query().where('id', ref.related_document_id).preload('identifier').preload('titles').first();
+
+            if (newerDataset) {
+                const versionInfo = {
+                    id: newerDataset.id,
+                    publish_id: newerDataset.publish_id,
+                    doi: newerDataset.identifier?.value || null,
+                    main_title: newerDataset.mainTitle || null,
+                    server_date_published: newerDataset.server_date_published,
+                    relation: 'IsNewVersionOf', // From perspective of current dataset
+                };
+
+                newerVersions.push(versionInfo);
+
+                // Recursively get even newer versions
+                const evenNewerVersions = await this.getNewerVersions(newerDataset.id, visited);
+                newerVersions.push(...evenNewerVersions);
+            }
+        }
+
+        return newerVersions;
+    }
 }
--- a/app/Controllers/Http/Api/FileController.ts
+++ b/app/Controllers/Http/Api/FileController.ts
@ -25,10 +25,10 @@ export default class FileController {
        const dataset = file.dataset;
        // Files from unpublished datasets are now blocked
        if (dataset.server_state !== 'published') {
-        return response.status(StatusCodes.FORBIDDEN).send({
-            message: `File access denied: Dataset is not published.`,
-        });
-    }
+            return response.status(StatusCodes.FORBIDDEN).send({
+                message: `File access denied: Dataset is not published.`,
+            });
+        }
        if (dataset && this.isUnderEmbargo(dataset.embargo_date)) {
            return response.status(StatusCodes.FORBIDDEN).send({
                message: `File is under embargo until ${dataset.embargo_date?.toFormat('yyyy-MM-dd')}`,
@ -36,12 +36,26 @@ export default class FileController {
        }

        // Proceed with file download
-        const filePath = '/storage/app/data/' + file.pathName;      
+        const filePath = '/storage/app/data/' + file.pathName;
        const fileExt = file.filePath.split('.').pop() || '';
-        // const fileName = file.label + fileExt;       
-        const fileName = file.label.toLowerCase().endsWith(`.${fileExt.toLowerCase()}`) 
-            ? file.label 
-            : `${file.label}.${fileExt}`;
+        // const fileName = file.label + fileExt;
+        const fileName = file.label.toLowerCase().endsWith(`.${fileExt.toLowerCase()}`) ? file.label : `${file.label}.${fileExt}`;
+
+        // Determine if file can be previewed inline in browser
+        const canPreviewInline = (mimeType: string): boolean => {
+            const type = mimeType.toLowerCase();
+            return (
+                type === 'application/pdf' ||
+                type.startsWith('image/') ||
+                type.startsWith('text/') ||
+                type === 'application/json' ||
+                type === 'application/xml' ||
+                // Uncomment if you want video/audio inline
+                type.startsWith('video/') ||
+                type.startsWith('audio/')
+            );
+        };
+        const disposition = canPreviewInline(file.mimeType) ? 'inline' : 'attachment';

        try {
            fs.accessSync(filePath, fs.constants.R_OK); //| fs.constants.W_OK);
@ -51,7 +65,7 @@ export default class FileController {
                .header('Cache-Control', 'no-cache private')
                .header('Content-Description', 'File Transfer')
                .header('Content-Type', file.mimeType)
-                .header('Content-Disposition', 'inline; filename=' + fileName)
+                .header('Content-Disposition', `${disposition}; filename="${fileName}"`)
                .header('Content-Transfer-Encoding', 'binary')
                .header('Access-Control-Allow-Origin', '*')
                .header('Access-Control-Allow-Methods', 'GET');
--- a/app/Controllers/Http/Editor/DatasetController.ts
+++ b/app/Controllers/Http/Editor/DatasetController.ts
@ -3,7 +3,7 @@ import { Client } from '@opensearch-project/opensearch';
 import User from '#models/user';
 import Dataset from '#models/dataset';
 import DatasetIdentifier from '#models/dataset_identifier';
-import XmlModel from '#app/Library/XmlModel';
+import DatasetXmlSerializer from '#app/Library/DatasetXmlSerializer';
 import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
 import { create } from 'xmlbuilder2';
 import { readFileSync } from 'fs';
@ -574,55 +574,88 @@ export default class DatasetsController {

    public async doiStore({ request, response }: HttpContext) {
        const dataId = request.param('publish_id');
-        const dataset = await Dataset.query()
-            // .preload('xmlCache')
-            .where('publish_id', dataId)
-            .firstOrFail();
+
+        // Load dataset with minimal required relationships
+        const dataset = await Dataset.query().where('publish_id', dataId).firstOrFail();
+
+        const prefix = process.env.DATACITE_PREFIX || '';
+        const base_domain = process.env.BASE_DOMAIN || '';
+
+        // Generate DOI metadata XML
        const xmlMeta = (await Index.getDoiRegisterString(dataset)) as string;

-        let prefix = '';
-        let base_domain = '';
-        // const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug';
-        prefix = process.env.DATACITE_PREFIX || '';
-        base_domain = process.env.BASE_DOMAIN || '';
+        // Prepare DOI registration data
+        const doiValue = `${prefix}/tethys.${dataset.publish_id}`; //'10.21388/tethys.213'
+        const landingPageUrl = `https://doi.${getDomain(base_domain)}/${prefix}/tethys.${dataset.publish_id}`; //https://doi.dev.tethys.at/10.21388/tethys.213

-        // register DOI:
-        const doiValue = prefix + '/tethys.' + dataset.publish_id; //'10.21388/tethys.213'
-        const landingPageUrl = 'https://doi.' + getDomain(base_domain) + '/' + prefix + '/tethys.' + dataset.publish_id; //https://doi.dev.tethys.at/10.21388/tethys.213
+        // Register DOI with DataCite
        const doiClient = new DoiClient();
        const dataciteResponse = await doiClient.registerDoi(doiValue, xmlMeta, landingPageUrl);

-        if (dataciteResponse?.status === 201) {
-            // if response OK 201; save the Identifier value into db
-            const doiIdentifier = new DatasetIdentifier();
-            doiIdentifier.value = doiValue;
-            doiIdentifier.dataset_id = dataset.id;
-            doiIdentifier.type = 'doi';
-            doiIdentifier.status = 'findable';
-
-            // save updated dataset to db an index to OpenSearch
-            try {
-                // save modified date of datset for re-caching model in db an update the search index
-                dataset.server_date_modified = DateTime.now();
-                // autoUpdate: true only triggers when dataset.save() is called, not when saving a related model like below
-                await dataset.save();
-                await dataset.related('identifier').save(doiIdentifier);
-                const index_name = 'tethys-records';
-                await Index.indexDocument(dataset, index_name);
-            } catch (error) {
-                logger.error(`${__filename}: Indexing document ${dataset.id} failed: ${error.message}`);
-                // Log the error or handle it as needed
-                throw new HttpException(error.message);
-            }
-            return response.toRoute('editor.dataset.list').flash('message', 'You have successfully created a DOI for the dataset!');
-        } else {
+        if (dataciteResponse?.status !== 201) {
            const message = `Unexpected DataCite MDS response code ${dataciteResponse?.status}`;
-            // Log the error or handle it as needed
            throw new DoiClientException(dataciteResponse?.status, message);
        }
+
+        // DOI registration successful - persist and index
+        try {
+            // Save identifier
+            await this.persistDoiAndIndex(dataset, doiValue);
+
+            return response.toRoute('editor.dataset.list').flash('message', 'You have successfully created a DOI for the dataset!');
+        } catch (error) {
+            logger.error(`${__filename}: Failed to persist DOI and index dataset ${dataset.id}: ${error.message}`);
+            throw new HttpException(error.message);
+        }
+
        // return response.toRoute('editor.dataset.list').flash('message', xmlMeta);
    }

+    /**
+     * Persist DOI identifier and update search index
+     * Handles cache invalidation to ensure fresh indexing
+     */
+    private async persistDoiAndIndex(dataset: Dataset, doiValue: string): Promise<void> {
+        // Create DOI identifier
+        const doiIdentifier = new DatasetIdentifier();
+        doiIdentifier.value = doiValue;
+        doiIdentifier.dataset_id = dataset.id;
+        doiIdentifier.type = 'doi';
+        doiIdentifier.status = 'findable';
+
+        // Save identifier (this will trigger database insert)
+        await dataset.related('identifier').save(doiIdentifier);
+
+        // Update dataset modification timestamp to reflect the change
+        dataset.server_date_modified = DateTime.now();
+        await dataset.save();
+
+        // Invalidate stale XML cache
+        await this.invalidateDatasetCache(dataset);
+
+        // Reload dataset with fresh state for indexing
+        const freshDataset = await Dataset.query().where('id', dataset.id).preload('identifier').preload('xmlCache').firstOrFail();
+
+        // Index to OpenSearch with fresh data
+        const index_name = process.env.OPENSEARCH_INDEX || 'tethys-records';
+        await Index.indexDocument(freshDataset, index_name);
+
+        logger.info(`Successfully created DOI ${doiValue} and indexed dataset ${dataset.id}`);
+    }
+
+    /**
+     * Invalidate XML cache for dataset
+     * Ensures fresh cache generation on next access
+     */
+    private async invalidateDatasetCache(dataset: Dataset): Promise<void> {
+        await dataset.load('xmlCache');
+
+        if (dataset.xmlCache) {
+            await dataset.xmlCache.delete();
+            logger.debug(`Invalidated XML cache for dataset ${dataset.id}`);
+        }
+    }
+
    public async show({}: HttpContext) {}

    public async edit({ request, inertia, response }: HttpContext) {
@ -1124,14 +1157,14 @@ export default class DatasetsController {

        // Set the response headers and download the file
        response
-                .header('Cache-Control', 'no-cache private')
-                .header('Content-Description', 'File Transfer')
-                .header('Content-Type', file.mime_type || 'application/octet-stream')
-                //  .header('Content-Disposition', 'inline; filename=' + fileName)
-                .header('Content-Transfer-Encoding', 'binary')
-                .header('Access-Control-Allow-Origin', '*')
-                .header('Access-Control-Allow-Methods', 'GET');
-            response.attachment(fileName);
+            .header('Cache-Control', 'no-cache private')
+            .header('Content-Description', 'File Transfer')
+            .header('Content-Type', file.mime_type || 'application/octet-stream')
+            //  .header('Content-Disposition', 'inline; filename=' + fileName)
+            .header('Content-Transfer-Encoding', 'binary')
+            .header('Access-Control-Allow-Origin', '*')
+            .header('Access-Control-Allow-Methods', 'GET');
+        response.attachment(fileName);
        return response.download(filePath);
    }

@ -1144,19 +1177,18 @@ export default class DatasetsController {
        }
    }

-    private async getDatasetXmlDomNode(dataset: Dataset) {
-        const xmlModel = new XmlModel(dataset);
+    private async getDatasetXmlDomNode(dataset: Dataset): Promise<XMLBuilder | null> {
+        const serializer = new DatasetXmlSerializer(dataset).enableCaching().excludeEmptyFields();
        // xmlModel.setModel(dataset);
-        xmlModel.excludeEmptyFields();
-        xmlModel.caching = true;
-        // const cache = dataset.xmlCache ? dataset.xmlCache : null;
-        // dataset.load('xmlCache');
+
+        // Load existing cache if available
+        await dataset.load('xmlCache');
        if (dataset.xmlCache) {
-            xmlModel.xmlCache = dataset.xmlCache;
+            serializer.setCache(dataset.xmlCache);
        }

        // return cache.getDomDocument();
-        const domDocument: XMLBuilder | null = await xmlModel.getDomDocument();
-        return domDocument;
+        const xmlDocument : XMLBuilder | null = await serializer.toXmlDocument();
+        return xmlDocument;
    }
 }
--- a/app/Controllers/Http/Oai/OaiController.ts
+++ b/app/Controllers/Http/Oai/OaiController.ts
@ -15,7 +15,7 @@ import { OaiModelException, BadOaiModelException } from '#app/exceptions/OaiMode
 import Dataset from '#models/dataset';
 import Collection from '#models/collection';
 import { getDomain, preg_match } from '#app/utils/utility-functions';
-import XmlModel from '#app/Library/XmlModel';
+import DatasetXmlSerializer from '#app/Library/DatasetXmlSerializer';
 import logger from '@adonisjs/core/services/logger';
 import ResumptionToken from '#app/Library/Oai/ResumptionToken';
 // import Config from '@ioc:Adonis/Core/Config';
@ -292,7 +292,7 @@ export default class OaiController {
        this.xsltParameter['repIdentifier'] = repIdentifier;
        const datasetNode = this.xml.root().ele('Datasets');

-        const paginationParams: PagingParameter ={
+        const paginationParams: PagingParameter = {
            cursor: 0,
            totalLength: 0,
            start: maxRecords + 1,
@ -333,7 +333,7 @@ export default class OaiController {
    }

    private async handleNoResumptionToken(oaiRequest: Dictionary, paginationParams: PagingParameter, maxRecords: number) {
-        this.validateMetadataPrefix(oaiRequest, paginationParams);       
+        this.validateMetadataPrefix(oaiRequest, paginationParams);
        const finder: ModelQueryBuilderContract<typeof Dataset, Dataset> = Dataset.query().whereIn(
            'server_state',
            this.deliveringDocumentStates,
@ -347,16 +347,20 @@ export default class OaiController {
        finder: ModelQueryBuilderContract<typeof Dataset, Dataset>,
        paginationParams: PagingParameter,
        oaiRequest: Dictionary,
-        maxRecords: number
+        maxRecords: number,
    ) {
        const totalResult = await finder
            .clone()
            .count('* as total')
            .first()
            .then((res) => res?.$extras.total);
-            paginationParams.totalLength = Number(totalResult);
+        paginationParams.totalLength = Number(totalResult);

-        const combinedRecords: Dataset[] = await finder.select('publish_id').orderBy('publish_id').offset(0).limit(maxRecords*2);
+        const combinedRecords: Dataset[] = await finder
+            .select('publish_id')
+            .orderBy('publish_id')
+            .offset(0)
+            .limit(maxRecords * 2);

        paginationParams.activeWorkIds = combinedRecords.slice(0, 100).map((dat) => Number(dat.publish_id));
        paginationParams.nextDocIds = combinedRecords.slice(100).map((dat) => Number(dat.publish_id));
@ -602,19 +606,17 @@ export default class OaiController {
    }

    private async getDatasetXmlDomNode(dataset: Dataset) {
-        const xmlModel = new XmlModel(dataset);
-        // xmlModel.setModel(dataset);
-        xmlModel.excludeEmptyFields();
-        xmlModel.caching = true;
+        const serializer = new DatasetXmlSerializer(dataset).enableCaching().excludeEmptyFields();
+
        // const cache = dataset.xmlCache ? dataset.xmlCache : null;
        // dataset.load('xmlCache');
        if (dataset.xmlCache) {
-            xmlModel.xmlCache = dataset.xmlCache;
+            serializer.setCache(dataset.xmlCache);
        }

-        // return cache.getDomDocument();
-        const domDocument: XMLBuilder | null = await xmlModel.getDomDocument();
-        return domDocument;
+        // return cache.toXmlDocument();
+        const xmlDocument: XMLBuilder | null = await serializer.toXmlDocument();
+        return xmlDocument;
    }

    private addSpecInformation(domNode: XMLBuilder, information: string) {
--- a/app/Library/DatasetXmlSerializer.ts
+++ b/app/Library/DatasetXmlSerializer.ts
@ -0,0 +1,231 @@
+import DocumentXmlCache from '#models/DocumentXmlCache';
+import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
+import Dataset from '#models/dataset';
+import Strategy from './Strategy.js';
+import { builder } from 'xmlbuilder2';
+import logger from '@adonisjs/core/services/logger';
+
+/**
+ * Configuration for XML serialization
+ *
+ * @interface XmlSerializationConfig
+ */
+export interface XmlSerializationConfig {
+    /** The dataset model to serialize */
+    model: Dataset;
+    /** DOM representation (if available) */
+    dom?: XMLBuilder;
+    /** Fields to exclude from serialization */
+    excludeFields: Array<string>;
+    /** Whether to exclude empty fields */
+    excludeEmpty: boolean;
+    /** Base URI for xlink:ref elements */
+    baseUri: string;
+}
+
+/**
+ * Options for controlling serialization behavior
+ */
+export interface SerializationOptions {
+    /** Enable XML caching */
+    enableCaching?: boolean;
+    /** Exclude empty fields from output */
+    excludeEmptyFields?: boolean;
+    /** Custom base URI */
+    baseUri?: string;
+    /** Fields to exclude */
+    excludeFields?: string[];
+}
+
+/**
+ * DatasetXmlSerializer
+ *
+ * Handles XML serialization of Dataset models with intelligent caching.
+ * Generates XML representations and manages cache lifecycle to optimize performance.
+ *
+ * @example
+ * ```typescript
+ * const serializer = new DatasetXmlSerializer(dataset);
+ * serializer.enableCaching();
+ * serializer.excludeEmptyFields();
+ *
+ * const xmlDocument = await serializer.toXmlDocument();
+ * ```
+ */
+export default class DatasetXmlSerializer {
+    private readonly config: XmlSerializationConfig;
+    private readonly strategy: Strategy;
+    private cache: DocumentXmlCache | null = null;
+    private cachingEnabled = false;
+
+    constructor(dataset: Dataset, options: SerializationOptions = {}) {
+        this.config = {
+            model: dataset,
+            excludeEmpty: options.excludeEmptyFields ?? false,
+            baseUri: options.baseUri ?? '',
+            excludeFields: options.excludeFields ?? [],
+        };
+
+        this.strategy = new Strategy({
+            excludeEmpty: options.excludeEmptyFields ?? false,
+            baseUri: options.baseUri ?? '',
+            excludeFields: options.excludeFields ?? [],
+            model: dataset,
+        });
+
+        if (options.enableCaching) {
+            this.cachingEnabled = true;
+        }
+    }
+
+    /**
+     * Enable caching for XML generation
+     * When enabled, generated XML is stored in database for faster retrieval
+     */
+    public enableCaching(): this {
+        this.cachingEnabled = true;
+        return this;
+    }
+
+    /**
+     * Disable caching for XML generation
+     */
+    public disableCaching(): this {
+        this.cachingEnabled = false;
+        return this;
+    }
+
+    set model(model: Dataset) {
+        this.config.model = model;
+    }
+
+    /**
+     * Configure to exclude empty fields from XML output
+     */
+    public excludeEmptyFields(): this {
+        this.config.excludeEmpty = true;
+        return this;
+    }
+
+    /**
+     * Set the cache instance directly (useful when preloading)
+     * @param cache - The DocumentXmlCache instance
+     */
+    public setCache(cache: DocumentXmlCache): this {
+        this.cache = cache;
+        return this;
+    }
+
+    /**
+     * Get the current cache instance
+     */
+    public getCache(): DocumentXmlCache | null {
+        return this.cache;
+    }
+
+    /**
+     * Get DOM document with intelligent caching
+     * Returns cached version if valid, otherwise generates new document
+     */
+    public async toXmlDocument(): Promise<XMLBuilder | null> {
+        const dataset = this.config.model;
+
+        // Try to get from cache first
+        let cachedDocument: XMLBuilder | null = await this.retrieveFromCache();
+
+        if (cachedDocument) {
+            logger.debug(`Using cached XML for dataset ${dataset.id}`);
+            return cachedDocument;
+        }
+
+        // Generate fresh document
+        logger.debug(`[DatasetXmlSerializer] Cache miss - generating fresh XML for dataset ${dataset.id}`);
+        const freshDocument = await this.strategy.createDomDocument();
+
+        if (!freshDocument) {
+            logger.error(`[DatasetXmlSerializer] Failed to generate XML for dataset ${dataset.id}`);
+            return null;
+        }
+
+        // Cache if caching is enabled
+        if (this.cachingEnabled) {
+            await this.persistToCache(freshDocument, dataset);
+        }
+
+        // Extract the dataset-specific node
+        return this.extractDatasetNode(freshDocument);
+    }
+
+    /**
+     * Generate XML string representation
+     * Convenience method that converts XMLBuilder to string
+     */
+    public async toXmlString(): Promise<string | null> {
+        const document = await this.toXmlDocument();
+        return document ? document.end({ prettyPrint: false }) : null;
+    }
+
+    /**
+     * Persist generated XML document to cache
+     * Non-blocking - failures are logged but don't interrupt the flow
+     */
+    private async persistToCache(domDocument: XMLBuilder, dataset: Dataset): Promise<void> {
+        try {
+            this.cache = this.cache || new DocumentXmlCache();
+            this.cache.document_id = dataset.id;
+            this.cache.xml_version = 1;
+            this.cache.server_date_modified = dataset.server_date_modified.toFormat('yyyy-MM-dd HH:mm:ss');
+            this.cache.xml_data = domDocument.end();
+
+            await this.cache.save();
+            logger.debug(`Cached XML for dataset ${dataset.id}`);
+        } catch (error) {
+            logger.error(`Failed to cache XML for dataset ${dataset.id}: ${error.message}`);
+            // Don't throw - caching failure shouldn't break the flow
+        }
+    }
+
+    /**
+     * Extract the Rdr_Dataset node from full document
+     */
+    private extractDatasetNode(domDocument: XMLBuilder): XMLBuilder | null {
+        const node = domDocument.find((n) => n.node.nodeName === 'Rdr_Dataset', false, true)?.node;
+
+        if (node) {
+            return builder({ version: '1.0', encoding: 'UTF-8', standalone: true }, node);
+        }
+
+        return domDocument;
+    }
+
+    /**
+     * Attempt to retrieve valid cached XML document
+     * Returns null if cache doesn't exist or is stale
+     */
+    private async retrieveFromCache(): Promise<XMLBuilder | null> {
+        const dataset: Dataset = this.config.model;
+        if (!this.cache) {
+            return null;
+        }
+
+        // Check if cache is still valid
+        const actuallyCached = await DocumentXmlCache.hasValidEntry(dataset.id, dataset.server_date_modified);
+
+        if (!actuallyCached) {
+            logger.debug(`Cache invalid for dataset ${dataset.id}`);
+            return null;
+        }
+
+        //cache is actual return cached document
+        try {
+            if (this.cache) {
+                return this.cache.getDomDocument();
+            } else {
+                return null;
+            }
+        } catch (error) {
+            logger.error(`Failed to retrieve cached document for dataset ${dataset.id}: ${error.message}`);
+            return null;
+        }
+    }
+}
--- a/app/Library/Utils/Index.ts
+++ b/app/Library/Utils/Index.ts
@ -2,7 +2,7 @@ import Dataset from '#models/dataset';
 import { Client } from '@opensearch-project/opensearch';
 import { create } from 'xmlbuilder2';
 import SaxonJS from 'saxon-js';
-import XmlModel from '#app/Library/XmlModel';
+import DatasetXmlSerializer from '#app/Library/DatasetXmlSerializer';
 import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
 import logger from '@adonisjs/core/services/logger';
 import { readFileSync } from 'fs';
@ -72,31 +72,42 @@ export default {
        }
    },

+    /**
+     * Index a dataset document to OpenSearch/Elasticsearch
+     */
    async indexDocument(dataset: Dataset, index_name: string): Promise<void> {
        try {
-            const proc = readFileSync('public/assets2/solr.sef.json');
-            const doc: string = await this.getTransformedString(dataset, proc);
+            // Load XSLT transformation file
+            const xsltProc = readFileSync('public/assets2/solr.sef.json');

-            let document = JSON.parse(doc);
+            // Transform dataset to JSON document
+            const jsonDoc: string = await this.getTransformedString(dataset, xsltProc);
+
+            const document = JSON.parse(jsonDoc);
+
+            // Index document to OpenSearch with doument json body
            await this.client.index({
                id: dataset.publish_id?.toString(),
                index: index_name,
                body: document,
-                refresh: true,
+                refresh: true, // make immediately searchable
            });
-            logger.info(`dataset with publish_id ${dataset.publish_id} successfully indexed`);
+            logger.info(`Dataset ${dataset.publish_id} successfully indexed to ${index_name}`);
        } catch (error) {
-            logger.error(`An error occurred while indexing datsaet with publish_id ${dataset.publish_id}.`);
+            logger.error(`Failed to index dataset ${dataset.publish_id}: ${error.message}`);
+            throw error; // Re-throw to allow caller to handle
        }
    },

+    /**
+     * Transform dataset XML to JSON using XSLT
+     */
    async getTransformedString(dataset: Dataset, proc: Buffer): Promise<string> {
-        let xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
-        const datasetNode = xml.root().ele('Dataset');
-        await createXmlRecord(dataset, datasetNode);
-        const xmlString = xml.end({ prettyPrint: false });
+        // Generate XML string from dataset
+        const xmlString = await this.generateDatasetXml(dataset);

        try {
+            // Apply XSLT transformation
            const result = await SaxonJS.transform({
                stylesheetText: proc,
                destination: 'serialized',
@ -108,6 +119,18 @@ export default {
            return '';
        }
    },
+
+    /**
+     * Generate XML string from dataset model
+     */
+    async generateDatasetXml(dataset: Dataset): Promise<string> {
+        const xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
+        const datasetNode = xml.root().ele('Dataset');
+
+        await createXmlRecord(dataset, datasetNode);
+
+        return xml.end({ prettyPrint: false });
+    },
 };
 /**
 * Return the default global focus trap stack
@ -115,74 +138,49 @@ export default {
 * @return {import('focus-trap').FocusTrap[]}
 */

-// export const indexDocument = async (dataset: Dataset, index_name: string, proc: Buffer): Promise<void> => {
-//     try {
-//         const doc = await getJsonString(dataset, proc);
-
-//         let document = JSON.parse(doc);
-//         await client.index({
-//             id: dataset.publish_id?.toString(),
-//             index: index_name,
-//             body: document,
-//             refresh: true,
-//         });
-//         Logger.info(`dataset with publish_id ${dataset.publish_id} successfully indexed`);
-//     } catch (error) {
-//         Logger.error(`An error occurred while indexing datsaet with publish_id ${dataset.publish_id}.`);
-//     }
-// };
-
-// const getJsonString = async (dataset, proc): Promise<string> => {
-//     let xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
-//     const datasetNode = xml.root().ele('Dataset');
-//     await createXmlRecord(dataset, datasetNode);
-//     const xmlString = xml.end({ prettyPrint: false });
-
-//     try {
-//         const result = await transform({
-//             stylesheetText: proc,
-//             destination: 'serialized',
-//             sourceText: xmlString,
-//         });
-//         return result.principalResult;
-//     } catch (error) {
-//         Logger.error(`An error occurred while creating the user, error: ${error.message},`);
-//         return '';
-//     }
-// };
-
+/**
+ * Create complete XML record for dataset
+ * Handles caching and metadata enrichment
+ */
 const createXmlRecord = async (dataset: Dataset, datasetNode: XMLBuilder): Promise<void> => {
    const domNode = await getDatasetXmlDomNode(dataset);
-    if (domNode) {
-         // add frontdoor url and data-type
-        dataset.publish_id && addLandingPageAttribute(domNode, dataset.publish_id.toString());
-        addSpecInformation(domNode, 'data-type:' + dataset.type);
-        if (dataset.collections) {
-            for (const coll of dataset.collections) {
-                const collRole = coll.collectionRole;
-                addSpecInformation(domNode, collRole.oai_name + ':' + coll.number);
-            }
-        }

-        datasetNode.import(domNode);
+    if (!domNode) {
+        throw new Error(`Failed to generate XML DOM node for dataset ${dataset.id}`);
    }
+
+    // Enrich with landing page URL
+    if (dataset.publish_id) {
+        addLandingPageAttribute(domNode, dataset.publish_id.toString());
+    }
+
+    // Add data type specification
+    addSpecInformation(domNode, `data-type:${dataset.type}`);
+
+    // Add collection information
+    if (dataset.collections) {
+        for (const coll of dataset.collections) {
+            const collRole = coll.collectionRole;
+            addSpecInformation(domNode, `${collRole.oai_name}:${coll.number}`);
+        }
+    }
+
+    datasetNode.import(domNode);
 };

 const getDatasetXmlDomNode = async (dataset: Dataset): Promise<XMLBuilder | null> => {
-    const xmlModel = new XmlModel(dataset);
+    const serializer = new DatasetXmlSerializer(dataset).enableCaching().excludeEmptyFields();
    // xmlModel.setModel(dataset);
-    xmlModel.excludeEmptyFields();
-    xmlModel.caching = true;
-    // const cache = dataset.xmlCache ? dataset.xmlCache : null;
-    // dataset.load('xmlCache');
+
+    // Load cache relationship if not already loaded
    await dataset.load('xmlCache');
    if (dataset.xmlCache) {
-        xmlModel.xmlCache = dataset.xmlCache;
+        serializer.setCache(dataset.xmlCache);
    }

-    // return cache.getDomDocument();
-    const domDocument: XMLBuilder | null = await xmlModel.getDomDocument();
-    return domDocument;
+    // Generate or retrieve cached DOM document
+    const xmlDocument: XMLBuilder | null = await serializer.toXmlDocument();
+    return xmlDocument;
 };

 const addLandingPageAttribute = (domNode: XMLBuilder, dataid: string) => {
@ -192,6 +190,6 @@ const addLandingPageAttribute = (domNode: XMLBuilder, dataid: string) => {
    domNode.att('landingpage', url);
 };

-const addSpecInformation= (domNode: XMLBuilder, information: string) => {
+const addSpecInformation = (domNode: XMLBuilder, information: string) => {
    domNode.ele('SetSpec').att('Value', information);
-};
+};
--- a/app/Library/XmlModel.ts
+++ b/app/Library/XmlModel.ts
@ -1,129 +0,0 @@
-import DocumentXmlCache from '#models/DocumentXmlCache';
-import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
-import Dataset from '#models/dataset';
-import Strategy from './Strategy.js';
-import { DateTime } from 'luxon';
-import { builder } from 'xmlbuilder2';
-
-/**
- * This is the description of the interface
- *
- * @interface Conf
- * @member {Model} model holds the current dataset model
- * @member {XMLBuilder} dom holds the current DOM representation
- * @member {Array<string>} excludeFields List of fields to skip on serialization.
- * @member {boolean} excludeEmpty True, if empty fields get excluded from serialization.
- * @member {string} baseUri Base URI for xlink:ref elements
- */
-export interface Conf {
-    model: Dataset;
-    dom?: XMLBuilder;
-    excludeFields: Array<string>;
-    excludeEmpty: boolean;
-    baseUri: string;
-}
-
-export default class XmlModel {
-    private config: Conf;
-    // private strategy = null;
-    private cache: DocumentXmlCache | null = null;
-    private _caching = false;
-    private strategy: Strategy;
-
-    constructor(dataset: Dataset) {
-        // $this->strategy = new Strategy();// Opus_Model_Xml_Version1;
-        // $this->config = new Conf();
-        // $this->strategy->setup($this->config);
-
-        this.config = {
-            excludeEmpty: false,
-            baseUri: '',
-            excludeFields: [],
-            model: dataset,
-        };
-
-        this.strategy = new Strategy({
-            excludeEmpty: true,
-            baseUri: '',
-            excludeFields: [],
-            model: dataset,
-        });
-    }
-
-    set model(model: Dataset) {
-        this.config.model = model;
-    }
-
-    public excludeEmptyFields(): void {
-        this.config.excludeEmpty = true;
-    }
-
-    get xmlCache(): DocumentXmlCache | null {
-        return this.cache;
-    }
-
-    set xmlCache(cache: DocumentXmlCache) {
-        this.cache = cache;
-    }
-
-    get caching(): boolean {
-        return this._caching;
-    }
-    set caching(caching: boolean) {
-        this._caching = caching;
-    }
-
-    public async getDomDocument(): Promise<XMLBuilder | null> {
-        const dataset = this.config.model;
-
-        let domDocument: XMLBuilder | null = await this.getDomDocumentFromXmlCache();
-        if (domDocument == null) {
-            domDocument = await this.strategy.createDomDocument();
-            //     domDocument = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
-            if (this._caching) {
-                // caching is desired:
-                this.cache = this.cache || new DocumentXmlCache();
-                this.cache.document_id = dataset.id;
-                this.cache.xml_version = 1; // (int)$this->strategy->getVersion();
-                this.cache.server_date_modified = dataset.server_date_modified.toFormat('yyyy-MM-dd HH:mm:ss');
-                this.cache.xml_data = domDocument.end();
-                await this.cache.save();
-            }
-            const node = domDocument.find(
-                (n) => {
-                    const test = n.node.nodeName == 'Rdr_Dataset';
-                    return test;
-                },
-                false,
-                true,
-            )?.node;
-            if (node != undefined) {
-                domDocument = builder({ version: '1.0', encoding: 'UTF-8', standalone: true }, node);
-            }
-        }
-        return domDocument;
-    }
-
-    private async getDomDocumentFromXmlCache(): Promise<XMLBuilder | null> {
-        const dataset: Dataset = this.config.model;
-        if (!this.cache) {
-            return null;
-        }
-        //.toFormat('YYYY-MM-DD HH:mm:ss');
-        let date: DateTime = dataset.server_date_modified;
-        const actuallyCached: boolean = await DocumentXmlCache.hasValidEntry(dataset.id, date);
-        if (!actuallyCached) {
-            return null;
-        }
-        //cache is actual return it for oai:
-        try {
-            if (this.cache) {
-                return this.cache.getDomDocument();
-            } else {
-                return null;
-            }
-        } catch (error) {
-            return null;
-        }
-    }
-}
--- a/app/models/DocumentXmlCache.ts
+++ b/app/models/DocumentXmlCache.ts
@ -4,7 +4,8 @@ import { builder, create } from 'xmlbuilder2';
 import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
 import db from '@adonisjs/lucid/services/db';
 import { DateTime } from 'luxon';
-import type { BelongsTo } from "@adonisjs/lucid/types/relations";
+import type { BelongsTo } from '@adonisjs/lucid/types/relations';
+import logger from '@adonisjs/core/services/logger';

 export default class DocumentXmlCache extends BaseModel {
    public static namingStrategy = new SnakeCaseNamingStrategy();
@ -66,33 +67,38 @@ export default class DocumentXmlCache extends BaseModel {
    }

    /**
-     * Check if a dataset in a specific xml version is already cached or not.
+     * Check if a valid (non-stale) cache entry exists
+     * Cache is valid only if it was created AFTER the dataset's last modification
     *
-     * @param mixed datasetId
-     * @param mixed serverDateModified
-     * @returns {Promise<boolean>} Returns true on cached hit else false.
+     * @param datasetId - The dataset ID to check
+     * @param datasetServerDateModified - The dataset's last modification timestamp
+     * @returns true if valid cache exists, false otherwise
     */
-    // public static async hasValidEntry(datasetId: number, datasetServerDateModified: DateTime): Promise<boolean> {
-    //     // const formattedDate = dayjs(datasetServerDateModified).format('YYYY-MM-DD HH:mm:ss');
-
-    //     const query = Database.from(this.table)
-    //         .where('document_id', datasetId)
-    //         .where('server_date_modified', '2023-08-17 16:51:03')
-    //         .first();
-
-    //     const row = await query;
-    //     return !!row;
-    // }
-
-    // Assuming 'DocumentXmlCache' has a table with a 'server_date_modified' column in your database
    public static async hasValidEntry(datasetId: number, datasetServerDateModified: DateTime): Promise<boolean> {
        const serverDateModifiedString: string = datasetServerDateModified.toFormat('yyyy-MM-dd HH:mm:ss'); // Convert DateTime to ISO string
-        const query = db.from(this.table)
+
+        const row = await db
+            .from(this.table)
            .where('document_id', datasetId)
-            .where('server_date_modified', '>=', serverDateModifiedString) // Check if server_date_modified is newer or equal
+            .where('server_date_modified', '>', serverDateModifiedString) // Check if server_date_modified is newer or equal
            .first();

-        const row = await query;
-        return !!row;
+        const isValid = !!row;
+
+        if (isValid) {
+            logger.debug(`Valid cache found for dataset ${datasetId}`);
+        } else {
+            logger.debug(`No valid cache for dataset ${datasetId} (dataset modified: ${serverDateModifiedString})`);
+        }
+
+        return isValid;
+    }
+
+    /**
+     * Invalidate (delete) cache entry
+     */
+    public async invalidate(): Promise<void> {
+        await this.delete();
+        logger.debug(`Invalidated cache for document ${this.document_id}`);
    }
 }
--- a/app/validators/dataset.ts
+++ b/app/validators/dataset.ts
@ -55,8 +55,8 @@ export const createDatasetValidator = vine.compile(
                        .translatedLanguage({ mainLanguageField: 'language', typeField: 'type' }),
                }),
            )
-           // .minLength(1),
-           .arrayContainsTypes({ typeA: 'abstract', typeB: 'translated' }),
+            // .minLength(1),
+            .arrayContainsTypes({ typeA: 'abstract', typeB: 'translated' }),
        authors: vine
            .array(
                vine.object({
@ -160,7 +160,8 @@ export const createDatasetValidator = vine.compile(
                    .fileScan({ removeInfected: true }),
            )
            .minLength(1),
-    }),);
+    }),
+);

 /**
 * Validates the dataset's update action
@ -309,11 +310,13 @@ export const updateDatasetValidator = vine.compile(
                    .fileScan({ removeInfected: true }),
            )
            .dependentArrayMinLength({ dependentArray: 'fileInputs', min: 1 }),
-        fileInputs: vine.array(
-            vine.object({
-                label: vine.string().trim().maxLength(100),               
-            }),
-        ).optional(),
+        fileInputs: vine
+            .array(
+                vine.object({
+                    label: vine.string().trim().maxLength(100),
+                }),
+            )
+            .optional(),
    }),
 );

@ -501,7 +504,7 @@ let messagesProvider = new SimpleMessagesProvider({
    'files.array.minLength': 'At least {{ min }}  file upload is required.',
    'files.*.size': 'file size is to big',
    'files.*.extnames': 'file extension is not supported',
-     'embargo_date.date.afterOrEqual': `Embargo date must be on or after ${dayjs().add(10, 'day').format('DD.MM.YYYY')}`,
+    'embargo_date.date.afterOrEqual': `Embargo date must be on or after ${dayjs().add(10, 'day').format('DD.MM.YYYY')}`,
 });

 createDatasetValidator.messagesProvider = messagesProvider;
--- a/app/validators/role.ts
+++ b/app/validators/role.ts
@ -8,20 +8,20 @@ export const createRoleValidator = vine.compile(
    vine.object({
        name: vine
            .string()
-            .isUnique({ table: 'roles', column: 'name' })
            .trim()
            .minLength(3)
            .maxLength(255)
-            .regex(/^[a-zA-Z0-9]+$/), //Must be alphanumeric with hyphens or underscores
+            .isUnique({ table: 'roles', column: 'name' })
+            .regex(/^[a-zA-Z0-9]+$/), // Must be alphanumeric
        display_name: vine
            .string()
-            .isUnique({ table: 'roles', column: 'display_name' })
            .trim()
            .minLength(3)
            .maxLength(255)
+            .isUnique({ table: 'roles', column: 'display_name' })
            .regex(/^[a-zA-Z0-9]+$/),
        description: vine.string().trim().escape().minLength(3).maxLength(255).optional(),
-        permissions: vine.array(vine.number()).minLength(1), // define at least one permission for the new role
+        permissions: vine.array(vine.number()).minLength(1), // At least one permission required
    }),
 );

@ -29,21 +29,28 @@ export const updateRoleValidator = vine.withMetaData<{ roleId: number }>().compi
    vine.object({
        name: vine
            .string()
-            // .unique(async (db, value, field) => {
-            //     const result = await db.from('roles').select('id').whereNot('id', field.meta.roleId).where('name', value).first();
-            //     return result.length ? false : true;
-            // })
+            .trim()
+            .minLength(3)
+            .maxLength(255)
            .isUnique({
                table: 'roles',
                column: 'name',
                whereNot: (field) => field.meta.roleId,
            })
+            .regex(/^[a-zA-Z0-9]+$/),
+        display_name: vine
+            .string()
            .trim()
            .minLength(3)
-            .maxLength(255),
-
+            .maxLength(255)
+            .isUnique({
+                table: 'roles',
+                column: 'display_name',
+                whereNot: (field) => field.meta.roleId,
+            })
+            .regex(/^[a-zA-Z0-9]+$/),
        description: vine.string().trim().escape().minLength(3).maxLength(255).optional(),
-        permissions: vine.array(vine.number()).minLength(1), // define at least one permission for the new role
+        permissions: vine.array(vine.number()).minLength(1), // At least one permission required
    }),
 );