- feat: Enhance README with setup instructions, usage, and command documentation

- fix: Update API routes to include DOI URL handling and improve route organization - chore: Add ORCID preload rule file and ensure proper registration - docs: Add MIT License to the project for open-source compliance - feat: Implement command to detect and fix missing dataset cross-references - feat: Create command for updating DataCite DOI records with detailed logging and error handling - docs: Add comprehensive documentation for dataset indexing command - docs: Create detailed documentation for DataCite update command with usage examples and error handling
2025-09-19 14:35:23 +02:00 · 2025-09-19 14:35:23 +02:00 · c049b22723
commit c049b22723
parent 8f67839f93
11 changed files with 2187 additions and 555 deletions
--- a/22
+++ b/22
@ -0,0 +1,22 @@
 MIT License
 Copyright (c) 2025 Tethys Research Repository
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE
--- a/app/Controllers/Http/Api/DatasetController.ts
+++ b/app/Controllers/Http/Api/DatasetController.ts
@ -1,23 +1,35 @@
 import type { HttpContext } from '@adonisjs/core/http';
 // import Person from 'App/Models/Person';
 import Dataset from '#models/dataset';
 import { StatusCodes } from 'http-status-codes';
 // node ace make:controller Author
 export default class DatasetController {
-    public async index({}: HttpContext) {
+    /**
-        // Select datasets with server_state 'published' or 'deleted' and sort by the last published date
+     * GET /api/datasets
-        const datasets = await Dataset.query()
+     * Find all published datasets
-            .where(function (query) {
+     */
-                query.where('server_state', 'published').orWhere('server_state', 'deleted');
+    public async index({ response }: HttpContext) {
-            })
+        try {
-            .preload('titles')
+            const datasets = await Dataset.query()
-            .preload('identifier')
+                .where(function (query) {
-            .orderBy('server_date_published', 'desc');
+                    query.where('server_state', 'published').orWhere('server_state', 'deleted');
                })
                .preload('titles')
                .preload('identifier')
                .orderBy('server_date_published', 'desc');
-        return datasets;
+            return response.status(StatusCodes.OK).json(datasets);
        } catch (error) {
            return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
                message: error.message || 'Some error occurred while retrieving datasets.',
            });
        }
    }
    /**
     * GET /api/dataset
     * Find all published datasets
     */
    public async findAll({ response }: HttpContext) {
        try {
            const datasets = await Dataset.query()
@ -33,48 +45,142 @@ export default class DatasetController {
        }
    }
-    public async findOne({ params }: HttpContext) {
+    /**
-        const datasets = await Dataset.query()
+     * GET /api/dataset/:publish_id
-            .where('publish_id', params.publish_id)
+     * Find one dataset by publish_id
-            .preload('titles')
+     */
-            .preload('descriptions')
+    public async findOne({ response, params }: HttpContext) {
-            .preload('user', (builder) => {
+        try {
-                builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
+            const dataset = await Dataset.query()
-            })
+                .where('publish_id', params.publish_id)
-            .preload('authors', (builder) => {
+                .preload('titles')
-                builder
+                .preload('descriptions') // Using 'descriptions' instead of 'abstracts'
-                    .select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
+                .preload('user', (builder) => {
-                    .withCount('datasets', (query) => {
+                    builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
-                        query.as('datasets_count');
+                })
-                    })
+                .preload('authors', (builder) => {
-                    .pivotColumns(['role', 'sort_order'])
+                    builder
-                    .orderBy('pivot_sort_order', 'asc');
+                        .select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
-            })
+                        .withCount('datasets', (query) => {
-            .preload('contributors', (builder) => {
+                            query.as('datasets_count');
-                builder
+                        })
-                    .select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
+                        .pivotColumns(['role', 'sort_order'])
-                    .withCount('datasets', (query) => {
+                        .orderBy('pivot_sort_order', 'asc');
-                        query.as('datasets_count');
+                })
-                    })
+                .preload('contributors', (builder) => {
-                    .pivotColumns(['role', 'sort_order', 'contributor_type'])
+                    builder
-                    .orderBy('pivot_sort_order', 'asc');
+                        .select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
-            })
+                        .withCount('datasets', (query) => {
-            .preload('subjects')
+                            query.as('datasets_count');
-            .preload('coverage')
+                        })
-            .preload('licenses')
+                        .pivotColumns(['role', 'sort_order', 'contributor_type'])
-            .preload('references')
+                        .orderBy('pivot_sort_order', 'asc');
-            .preload('project')
+                })
-            .preload('referenced_by', (builder) => {
+                .preload('subjects')
-                builder.preload('dataset', (builder) => {
+                .preload('coverage')
-                    builder.preload('identifier');
+                .preload('licenses')
-                });
+                .preload('references')
-            })
+                .preload('project')
-            .preload('files', (builder) => {
+                .preload('referenced_by', (builder) => {
-                builder.preload('hashvalues');
+                    builder.preload('dataset', (builder) => {
-            })
+                        builder.preload('identifier');
-            .preload('identifier')
+                    });
-            .firstOrFail();
+                })
                .preload('files', (builder) => {
                    builder.preload('hashvalues');
                })
                .preload('identifier')
                .first(); // Use first() instead of firstOrFail() to handle not found gracefully
-        return datasets;
+            if (!dataset) {
                return response.status(StatusCodes.NOT_FOUND).json({
                    message: `Cannot find Dataset with publish_id=${params.publish_id}.`,
                });
            }
            return response.status(StatusCodes.OK).json(dataset);
        } catch (error) {
            return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
                message: error.message || `Error retrieving Dataset with publish_id=${params.publish_id}.`,
            });
        }
    }
    /**
     * GET /:prefix/:value
     * Find dataset by identifier (e.g., https://doi.tethys.at/10.24341/tethys.99.2)
     */
    public async findByIdentifier({ response, params }: HttpContext) {
        const identifierValue = `${params.prefix}/${params.value}`;
        // Optional: Validate DOI format
        if (!identifierValue.match(/^10\.\d+\/[a-zA-Z0-9._-]+\.[0-9]+(?:\.[0-9]+)*$/)) {
            return response.status(StatusCodes.BAD_REQUEST).json({
                message: `Invalid DOI format: ${identifierValue}`,
            });
        }
        try {
            // Method 1: Using subquery with whereIn (most similar to your original)
            const dataset = await Dataset.query()
                // .whereIn('id', (subQuery) => {
                //     subQuery.select('dataset_id').from('dataset_identifiers').where('value', identifierValue);
                // })
                .whereHas('identifier', (builder) => {
                    builder.where('value', identifierValue);
                })
                .preload('titles')
                .preload('descriptions') // Using 'descriptions' instead of 'abstracts'
                .preload('user', (builder) => {
                    builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
                })
                .preload('authors', (builder) => {
                    builder
                        .select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
                        .withCount('datasets', (query) => {
                            query.as('datasets_count');
                        })
                        .pivotColumns(['role', 'sort_order'])
                        .wherePivot('role', 'author')
                        .orderBy('pivot_sort_order', 'asc');
                })
                .preload('contributors', (builder) => {
                    builder
                        .select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
                        .withCount('datasets', (query) => {
                            query.as('datasets_count');
                        })
                        .pivotColumns(['role', 'sort_order', 'contributor_type'])
                        .wherePivot('role', 'contributor')
                        .orderBy('pivot_sort_order', 'asc');
                })
                .preload('subjects')
                .preload('coverage')
                .preload('licenses')
                .preload('references')
                .preload('project')
                .preload('referenced_by', (builder) => {
                    builder.preload('dataset', (builder) => {
                        builder.preload('identifier');
                    });
                })
                .preload('files', (builder) => {
                    builder.preload('hashvalues');
                })
                .preload('identifier')
                .first();
            if (!dataset) {
                return response.status(StatusCodes.NOT_FOUND).json({
                    message: `Cannot find Dataset with identifier=${identifierValue}.`,
                });
            }
            return response.status(StatusCodes.OK).json(dataset);
        } catch (error) {
            return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
                message: error.message || `Error retrieving Dataset with identifier=${identifierValue}.`,
            });
        }
    }
 }
--- a/app/Library/Doi/DoiClient.ts
+++ b/app/Library/Doi/DoiClient.ts
@ -1,6 +1,3 @@
 // import { Client } from 'guzzle';
 // import { Log } from '@adonisjs/core/build/standalone';
 // import { DoiInterface } from './interfaces/DoiInterface';
 import DoiClientContract from '#app/Library/Doi/DoiClientContract';
 import DoiClientException from '#app/exceptions/DoiClientException';
 import { StatusCodes } from 'http-status-codes';
@ -12,14 +9,14 @@ export class DoiClient implements DoiClientContract {
    public username: string;
    public password: string;
    public serviceUrl: string;
    public apiUrl: string;
    constructor() {
        // const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug';
        this.username = process.env.DATACITE_USERNAME || '';
        this.password = process.env.DATACITE_PASSWORD || '';
        this.serviceUrl = process.env.DATACITE_SERVICE_URL || '';
-        // this.prefix = process.env.DATACITE_PREFIX || '';
+        this.apiUrl = process.env.DATACITE_API_URL || 'https://api.datacite.org';
        // this.base_domain = process.env.BASE_DOMAIN || '';
        if (this.username === '' || this.password === '' || this.serviceUrl === '') {
            const message = 'issing configuration settings to properly initialize DOI client';
@ -90,4 +87,240 @@ export class DoiClient implements DoiClientContract {
            throw new DoiClientException(error.response.status, error.response.data);
        }
    }
    /**
     * Retrieves DOI information from DataCite REST API
     *
     * @param doiValue The DOI identifier e.g. '10.5072/tethys.999'
     * @returns Promise with DOI information or null if not found
     */
    public async getDoiInfo(doiValue: string): Promise<any | null> {
        try {
            // Use configurable DataCite REST API URL
            const dataciteApiUrl = `${this.apiUrl}/dois/${doiValue}`;
            const response = await axios.get(dataciteApiUrl, {
                headers: {
                    Accept: 'application/vnd.api+json',
                },
            });
            if (response.status === 200 && response.data.data) {
                return {
                    created: response.data.data.attributes.created,
                    registered: response.data.data.attributes.registered,
                    updated: response.data.data.attributes.updated,
                    published: response.data.data.attributes.published,
                    state: response.data.data.attributes.state,
                    url: response.data.data.attributes.url,
                    metadata: response.data.data.attributes,
                };
            }
        } catch (error) {
            if (error.response?.status === 404) {
                logger.debug(`DOI ${doiValue} not found in DataCite`);
                return null;
            }
            logger.debug(`DataCite REST API failed for ${doiValue}: ${error.message}`);
            // Fallback to MDS API
            return await this.getDoiInfoFromMds(doiValue);
        }
        return null;
    }
    /**
     * Fallback method to get DOI info from MDS API
     *
     * @param doiValue The DOI identifier
     * @returns Promise with basic DOI information or null
     */
    private async getDoiInfoFromMds(doiValue: string): Promise<any | null> {
        try {
            const auth = {
                username: this.username,
                password: this.password,
            };
            // Get DOI URL
            const doiResponse = await axios.get(`${this.serviceUrl}/doi/${doiValue}`, { auth });
            if (doiResponse.status === 200) {
                // Get metadata if available
                try {
                    const metadataResponse = await axios.get(`${this.serviceUrl}/metadata/${doiValue}`, {
                        auth,
                        headers: {
                            Accept: 'application/xml',
                        },
                    });
                    return {
                        url: doiResponse.data.trim(),
                        metadata: metadataResponse.data,
                        created: new Date().toISOString(), // MDS doesn't provide creation dates
                        registered: new Date().toISOString(), // Use current time as fallback
                        source: 'mds',
                    };
                } catch (metadataError) {
                    // Return basic info even if metadata fetch fails
                    return {
                        url: doiResponse.data.trim(),
                        created: new Date().toISOString(),
                        registered: new Date().toISOString(),
                        source: 'mds',
                    };
                }
            }
        } catch (error) {
            if (error.response?.status === 404) {
                logger.debug(`DOI ${doiValue} not found in DataCite MDS`);
                return null;
            }
            logger.debug(`DataCite MDS API failed for ${doiValue}: ${error.message}`);
        }
        return null;
    }
    /**
     * Checks if a DOI exists in DataCite
     *
     * @param doiValue The DOI identifier
     * @returns Promise<boolean> True if DOI exists
     */
    public async doiExists(doiValue: string): Promise<boolean> {
        const doiInfo = await this.getDoiInfo(doiValue);
        return doiInfo !== null;
    }
    /**
     * Gets the last modification date of a DOI
     *
     * @param doiValue The DOI identifier
     * @returns Promise<Date | null> Last modification date or creation date if never updated, null if not found
     */
    public async getDoiLastModified(doiValue: string): Promise<Date | null> {
        const doiInfo = await this.getDoiInfo(doiValue);
        if (doiInfo) {
            // Use updated date if available, otherwise fall back to created/registered date
            const dateToUse = doiInfo.updated || doiInfo.registered || doiInfo.created;
            if (dateToUse) {
                logger.debug(
                    `DOI ${doiValue}: Using ${doiInfo.updated ? 'updated' : doiInfo.registered ? 'registered' : 'created'} date: ${dateToUse}`,
                );
                return new Date(dateToUse);
            }
        }
        return null;
    }
    /**
     * Makes a DOI unfindable (registered but not discoverable)
     * Note: DOIs cannot be deleted, only made unfindable
     * await doiClient.makeDoiUnfindable('10.21388/tethys.231');
     *
     * @param doiValue The DOI identifier e.g. '10.5072/tethys.999'
     * @returns Promise<AxiosResponse<any>> The http response
     */
    public async makeDoiUnfindable(doiValue: string): Promise<AxiosResponse<any>> {
        const auth = {
            username: this.username,
            password: this.password,
        };
        try {
            // First, check if DOI exists
            const exists = await this.doiExists(doiValue);
            if (!exists) {
                throw new DoiClientException(404, `DOI ${doiValue} not found`);
            }
            // Delete the DOI URL mapping to make it unfindable
            // This removes the URL but keeps the metadata registered
            const response = await axios.delete(`${this.serviceUrl}/doi/${doiValue}`, { auth });
            // Response Codes for DELETE /doi/{doi}
            // 200 OK: operation successful
            // 401 Unauthorized: no login
            // 403 Forbidden: login problem, quota exceeded
            // 404 Not Found: DOI does not exist
            if (response.status !== 200) {
                const message = `Unexpected DataCite MDS response code ${response.status}`;
                logger.error(message);
                throw new DoiClientException(response.status, message);
            }
            logger.info(`DOI ${doiValue} successfully made unfindable`);
            return response;
        } catch (error) {
            logger.error(`Failed to make DOI ${doiValue} unfindable: ${error.message}`);
            if (error instanceof DoiClientException) {
                throw error;
            }
            throw new DoiClientException(error.response?.status || 500, error.response?.data || error.message);
        }
    }
    /**
     * Makes a DOI findable again by re-registering the URL
     * await doiClient.makeDoiFindable(
     *     '10.21388/tethys.231',
     *     'https://doi.dev.tethys.at/10.21388/tethys.231'
     * );
     *
     * @param doiValue The DOI identifier e.g. '10.5072/tethys.999'
     * @param landingPageUrl The landing page URL
     * @returns Promise<AxiosResponse<any>> The http response
     */
    public async makeDoiFindable(doiValue: string, landingPageUrl: string): Promise<AxiosResponse<any>> {
        const auth = {
            username: this.username,
            password: this.password,
        };
        try {
            // Re-register the DOI with its URL to make it findable again
            const response = await axios.put(`${this.serviceUrl}/doi/${doiValue}`, `doi=${doiValue}\nurl=${landingPageUrl}`, { auth });
            // Response Codes for PUT /doi/{doi}
            // 201 Created: operation successful
            // 400 Bad Request: request body must be exactly two lines: DOI and URL
            // 401 Unauthorized: no login
            // 403 Forbidden: login problem, quota exceeded
            // 412 Precondition failed: metadata must be uploaded first
            if (response.status !== 201) {
                const message = `Unexpected DataCite MDS response code ${response.status}`;
                logger.error(message);
                throw new DoiClientException(response.status, message);
            }
            logger.info(`DOI ${doiValue} successfully made findable again`);
            return response;
        } catch (error) {
            logger.error(`Failed to make DOI ${doiValue} findable: ${error.message}`);
            if (error instanceof DoiClientException) {
                throw error;
            }
            throw new DoiClientException(error.response?.status || 500, error.response?.data || error.message);
        }
    }
    /**
     * Gets the current state of a DOI (draft, registered, findable)
     * const state = await doiClient.getDoiState('10.21388/tethys.231');
     * console.log(`Current state: ${state}`); // 'findable'
     *
     * @param doiValue The DOI identifier
     * @returns Promise<string | null> The DOI state or null if not found
     */
    public async getDoiState(doiValue: string): Promise<string | null> {
        const doiInfo = await this.getDoiInfo(doiValue);
        return doiInfo?.state || null;
    }
 }
--- a/commands/fix_dataset_cross_references.ts
+++ b/commands/fix_dataset_cross_references.ts
@ -0,0 +1,317 @@
 /*
 |--------------------------------------------------------------------------
 | node ace make:command fix-dataset-cross-references
 | DONE:    create commands/fix_dataset_cross_references.ts
 |--------------------------------------------------------------------------
 */
 import { BaseCommand, flags } from '@adonisjs/core/ace';
 import type { CommandOptions } from '@adonisjs/core/types/ace';
 import Dataset from '#models/dataset';
 import DatasetReference from '#models/dataset_reference';
 // import env from '#start/env';
 interface MissingCrossReference {
    sourceDatasetId: number;
    targetDatasetId: number;
    sourcePublishId: number | null;
    targetPublishId: number | null;
    referenceType: string;
    relation: string;
    doi: string | null;
    reverseRelation: string;
 }
 export default class DetectMissingCrossReferences extends BaseCommand {
    static commandName = 'detect:missing-cross-references';
    static description = 'Detect missing bidirectional cross-references between versioned datasets';
    public static needsApplication = true;
    @flags.boolean({ alias: 'f', description: 'Fix missing cross-references automatically' })
    public fix: boolean = false;
    @flags.boolean({ alias: 'v', description: 'Verbose output' })
    public verbose: boolean = false;
    public static options: CommandOptions = {
        startApp: true,
        staysAlive: false,
    };
    async run() {
        this.logger.info('🔍 Detecting missing cross-references...');
        try {
            const missingReferences = await this.findMissingCrossReferences();
            if (missingReferences.length === 0) {
                this.logger.success('All cross-references are properly linked!');
                return;
            }
            this.logger.warning(`Found ${missingReferences.length} missing cross-reference(s):`);
            for (const missing of missingReferences) {
                this.logger.info(
                    `Dataset ${missing.sourceDatasetId} references ${missing.targetDatasetId}, but reverse reference is missing`,
                );
                if (this.verbose) {
                    this.logger.info(`  - Reference type: ${missing.referenceType}`);
                    this.logger.info(`  - Relation: ${missing.relation}`);
                    this.logger.info(`  - DOI: ${missing.doi}`);
                }
            }
            if (this.fix) {
                await this.fixMissingReferences(missingReferences);
                this.logger.success('All missing cross-references have been fixed!');
            } else {
                this.printMissingReferencesList(missingReferences);
                this.logger.info('💡 Run with --fix flag to automatically create missing cross-references');
            }
        } catch (error) {
            this.logger.error('Error detecting missing cross-references:', error);
            process.exit(1);
        }
    }
    private async findMissingCrossReferences(): Promise<MissingCrossReference[]> {
        const missingReferences: {
            sourceDatasetId: number;
            targetDatasetId: number;
            sourcePublishId: number | null;
            targetPublishId: number | null;
            referenceType: string;
            relation: string;
            doi: string | null;
            reverseRelation: string;
        }[] = [];
        this.logger.info('📊 Querying dataset references...');
        // Find all references that point to Tethys datasets (DOI or URL containing tethys DOI)
        // Only from datasets that are published
        const tethysReferences = await DatasetReference.query()
            .whereIn('type', ['DOI', 'URL'])
            .where((query) => {
                query.where('value', 'like', '%doi.org/10.24341/tethys.%').orWhere('value', 'like', '%tethys.at/dataset/%');
            })
            .preload('dataset', (datasetQuery) => {
                datasetQuery.where('server_state', 'published');
            })
            .whereHas('dataset', (datasetQuery) => {
                datasetQuery.where('server_state', 'published');
            });
        this.logger.info(`🔗 Found ${tethysReferences.length} Tethys references from published datasets`);
        let processedCount = 0;
        for (const reference of tethysReferences) {
            processedCount++;
            if (this.verbose && processedCount % 10 === 0) {
                this.logger.info(`📈 Processed ${processedCount}/${tethysReferences.length} references...`);
            }
            // Extract dataset publish_id from DOI or URL
            const targetDatasetPublish = this.extractDatasetPublishIdFromReference(reference.value);
            if (!targetDatasetPublish) {
                if (this.verbose) {
                    this.logger.warning(`⚠️  Could not extract publish ID from: ${reference.value}`);
                }
                continue;
            }
            // Check if target dataset exists and is published
            const targetDataset = await Dataset.query()
                .where('publish_id', targetDatasetPublish)
                .where('server_state', 'published')
                .first();
            if (!targetDataset) {
                if (this.verbose) {
                    this.logger.warning(`⚠️  Target dataset with publish_id ${targetDatasetPublish} not found or not published`);
                }
                continue;
            }
            // Ensure we have a valid source dataset with proper preloading
            if (!reference.dataset) {
                this.logger.warning(`⚠️  Source dataset ${reference.document_id} not properly loaded, skipping...`);
                continue;
            }
            // Check if reverse reference exists
            const reverseReferenceExists = await this.checkReverseReferenceExists(
                targetDataset.id,
                reference.document_id,
                reference.relation,
            );
            if (!reverseReferenceExists) {
                missingReferences.push({
                    sourceDatasetId: reference.document_id,
                    targetDatasetId: targetDataset.id,
                    sourcePublishId: reference.dataset.publish_id || null,
                    targetPublishId: targetDataset.publish_id || null,
                    referenceType: reference.type,
                    relation: reference.relation,
                    doi: reference.value,
                    reverseRelation: this.getReverseRelation(reference.relation),
                });
            }
        }
        this.logger.info(`✅ Processed all ${processedCount} references`);
        return missingReferences;
    }
    private extractDatasetPublishIdFromReference(value: string): number | null {
        // Extract from DOI: https://doi.org/10.24341/tethys.107 -> 107
        const doiMatch = value.match(/10\.24341\/tethys\.(\d+)/);
        if (doiMatch) {
            return parseInt(doiMatch[1]);
        }
        // Extract from URL: https://tethys.at/dataset/107 -> 107
        const urlMatch = value.match(/tethys\.at\/dataset\/(\d+)/);
        if (urlMatch) {
            return parseInt(urlMatch[1]);
        }
        return null;
    }
    private async checkReverseReferenceExists(
        sourceDatasetId: number,
        targetDatasetId: number,
        originalRelation: string,
    ): Promise<boolean> {
        const reverseRelation = this.getReverseRelation(originalRelation);
        // Only check for reverse references where the source dataset is also published
        const reverseReference = await DatasetReference.query()
            .where('document_id', sourceDatasetId)
            .where('related_document_id', targetDatasetId)
            .where('relation', reverseRelation)
            .whereHas('dataset', (datasetQuery) => {
                datasetQuery.where('server_state', 'published');
            })
            .first();
        return !!reverseReference;
    }
    private getReverseRelation(relation: string): string {
        const relationMap: Record<string, string> = {
            IsNewVersionOf: 'IsPreviousVersionOf',
            IsPreviousVersionOf: 'IsNewVersionOf',
            IsVersionOf: 'HasVersion',
            HasVersion: 'IsVersionOf',
            Compiles: 'IsCompiledBy',
            IsCompiledBy: 'Compiles',
            IsVariantFormOf: 'IsOriginalFormOf',
            IsOriginalFormOf: 'IsVariantFormOf',
            IsPartOf: 'HasPart',
            HasPart: 'IsPartOf',
            IsSupplementTo: 'IsSupplementedBy',
            IsSupplementedBy: 'IsSupplementTo',
            Continues: 'IsContinuedBy',
            IsContinuedBy: 'Continues',
        };
        // to catch relation types like 'compiles' or 'IsVariantFormOf' that are not in the map mark reverse as 'HasVersion'
        return relationMap[relation] || 'HasVersion'; // Default fallback
    }
    private printMissingReferencesList(missingReferences: MissingCrossReference[]) {
        console.log('┌─────────────────────────────────────────────────────────────────────────────────┐');
        console.log('│                         MISSING CROSS-REFERENCES REPORT                        │');
        console.log('│                            (Published Datasets Only)                           │');
        console.log('└─────────────────────────────────────────────────────────────────────────────────┘');
        console.log();
        missingReferences.forEach((missing, index) => {
            console.log(
                `${index + 1}. Dataset ${missing.sourceDatasetId} (Publish ID: ${missing.sourcePublishId}) → Dataset ${missing.targetDatasetId} (Publish ID: ${missing.targetPublishId})`,
            );
            console.log(`   ├─ Current relation: "${missing.relation}"`);
            console.log(`   ├─ Missing reverse relation: "${missing.reverseRelation}"`);
            console.log(`   ├─ Reference type: ${missing.referenceType}`);
            console.log(`   └─ DOI/URL: ${missing.doi}`);
            console.log();
        });
        console.log('┌─────────────────────────────────────────────────────────────────────────────────┐');
        console.log(`│ SUMMARY: ${missingReferences.length} missing reverse reference(s) detected                      │`);
        console.log('└─────────────────────────────────────────────────────────────────────────────────┘');
    }
    private async fixMissingReferences(missingReferences: MissingCrossReference[]) {
        this.logger.info('🔧 Creating missing cross-references in database...');
        let fixedCount = 0;
        let errorCount = 0;
        for (const [index, missing] of missingReferences.entries()) {
            try {
                // Get the source dataset to create proper reference - ensure it's published
                const sourceDataset = await Dataset.query()
                    .where('id', missing.sourceDatasetId)
                    .where('server_state', 'published')
                    .preload('identifier')
                    .first();
                if (!sourceDataset) {
                    this.logger.warning(`⚠️  Source dataset ${missing.sourceDatasetId} not found or not published, skipping...`);
                    errorCount++;
                    continue;
                }
                // Create the reverse reference
                const reverseReference = new DatasetReference();
                reverseReference.document_id = missing.targetDatasetId;
                reverseReference.related_document_id = missing.sourceDatasetId;
                reverseReference.type = 'DOI';
                reverseReference.relation = missing.reverseRelation;
                // Use the source dataset's DOI for the value
                if (sourceDataset.identifier?.value) {
                    reverseReference.value = `https://doi.org/${sourceDataset.identifier.value}`;
                } else {
                    // Fallback to dataset URL if no DOI
                    reverseReference.value = `https://tethys.at/dataset/${sourceDataset.publish_id || missing.sourceDatasetId}`;
                }
                // Use the source dataset's main title for the label
                reverseReference.label = sourceDataset.mainTitle || `Dataset ${missing.sourceDatasetId}`;
                await reverseReference.save();
                fixedCount++;
                if (this.verbose) {
                    this.logger.info(
                        `✅ [${index + 1}/${missingReferences.length}] Created reverse reference: Dataset ${missing.targetDatasetId} -> ${missing.sourceDatasetId}`,
                    );
                } else if ((index + 1) % 10 === 0) {
                    this.logger.info(`📈 Fixed ${fixedCount}/${missingReferences.length} references...`);
                }
            } catch (error) {
                this.logger.error(
                    `❌ Error creating reverse reference for datasets ${missing.targetDatasetId} -> ${missing.sourceDatasetId}:`,
                    error,
                );
                errorCount++;
            }
        }
        this.logger.info(`📊 Fix completed: ${fixedCount} created, ${errorCount} errors`);
    }
 }
--- a/commands/update_datacite.ts
+++ b/commands/update_datacite.ts
@ -0,0 +1,271 @@
 /*
 |--------------------------------------------------------------------------
 | node ace make:command update-datacite
 | DONE:    create commands/update_datacite.ts
 |--------------------------------------------------------------------------
 */
 import { BaseCommand, flags } from '@adonisjs/core/ace';
 import { CommandOptions } from '@adonisjs/core/types/ace';
 import Dataset from '#models/dataset';
 import { DoiClient } from '#app/Library/Doi/DoiClient';
 import DoiClientException from '#app/exceptions/DoiClientException';
 import Index from '#app/Library/Utils/Index';
 import env from '#start/env';
 import logger from '@adonisjs/core/services/logger';
 import { DateTime } from 'luxon';
 import { getDomain } from '#app/utils/utility-functions';
 export default class UpdateDatacite extends BaseCommand {
    static commandName = 'update:datacite';
    static description = 'Update DataCite DOI records for published datasets';
    public static needsApplication = true;
    @flags.number({ alias: 'p', description: 'Specific publish_id to update' })
    public publish_id: number;
    @flags.boolean({ alias: 'f', description: 'Force update all records regardless of modification date' })
    public force: boolean = false;
    @flags.boolean({ alias: 'd', description: 'Dry run - show what would be updated without making changes' })
    public dryRun: boolean = false;
    @flags.boolean({ alias: 's', description: 'Show detailed stats for each dataset that needs updating' })
    public stats: boolean = false;
    //example: node ace update:datacite -p 123 --force --dry-run
    public static options: CommandOptions = {
        startApp: true, // Whether to boot the application before running the command
        stayAlive: false, // Whether to keep the process alive after the command has executed
    };
    async run() {
        logger.info('Starting DataCite update process...');
        const prefix = env.get('DATACITE_PREFIX', '');
        const base_domain = env.get('BASE_DOMAIN', '');
        const apiUrl = env.get('DATACITE_API_URL', 'https://api.datacite.org');
        if (!prefix || !base_domain) {
            logger.error('Missing DATACITE_PREFIX or BASE_DOMAIN environment variables');
            return;
        }
        logger.info(`Using DataCite API: ${apiUrl}`);
        const datasets = await this.getDatasets();
        logger.info(`Found ${datasets.length} datasets to process`);
        let updated = 0;
        let skipped = 0;
        let errors = 0;
        for (const dataset of datasets) {
            try {
                const shouldUpdate = this.force || (await this.shouldUpdateDataset(dataset));
                if (this.stats) {
                    // Stats mode: show detailed information for datasets that need updating
                    if (shouldUpdate) {
                        await this.showDatasetStats(dataset);
                        updated++;
                    } else {
                        skipped++;
                    }
                    continue;
                }
                if (!shouldUpdate) {
                    logger.info(`Dataset ${dataset.publish_id}: Up to date, skipping`);
                    skipped++;
                    continue;
                }
                if (this.dryRun) {
                    logger.info(`Dataset ${dataset.publish_id}: Would update DataCite record (dry run)`);
                    updated++;
                    continue;
                }
                await this.updateDataciteRecord(dataset, prefix, base_domain);
                logger.info(`Dataset ${dataset.publish_id}: Successfully updated DataCite record`);
                updated++;
            } catch (error) {
                logger.error(`Dataset ${dataset.publish_id}: Failed to update - ${error.message}`);
                errors++;
            }
        }
        if (this.stats) {
            logger.info(`\nDataCite Stats Summary: ${updated} datasets need updating, ${skipped} are up to date`);
        } else {
            logger.info(`DataCite update completed. Updated: ${updated}, Skipped: ${skipped}, Errors: ${errors}`);
        }
    }
    private async getDatasets(): Promise<Dataset[]> {
        const query = Dataset.query()
            .preload('identifier')
            .preload('xmlCache')
            .where('server_state', 'published')
            .whereHas('identifier', (identifierQuery) => {
                identifierQuery.where('type', 'doi');
            });
        if (this.publish_id) {
            query.where('publish_id', this.publish_id);
        }
        return await query.exec();
    }
    private async shouldUpdateDataset(dataset: Dataset): Promise<boolean> {
        try {
            // Check if dataset has a DOI identifier (HasOne relationship)
            let doiIdentifier = dataset.identifier;
            if (!doiIdentifier) {
                // Try to load the relationship if not already loaded
                await dataset.load('identifier');
                doiIdentifier = dataset.identifier;
            }
            if (!doiIdentifier || doiIdentifier.type !== 'doi') {
                logger.warn(`Dataset ${dataset.publish_id}: No DOI identifier found`);
                return false;
            }
            // Validate dataset modification date
            const datasetModified = dataset.server_date_modified;
            const now = DateTime.now();
            if (!datasetModified) {
                logger.error(`Dataset ${dataset.publish_id}: server_date_modified is null or undefined`);
                return true; // Update anyway if modification date is missing
            }
            if (datasetModified > now) {
                logger.error(
                    `Dataset ${dataset.publish_id}: server_date_modified (${datasetModified.toISO()}) is in the future! ` +
                        `Current time: ${now.toISO()}. This indicates a data integrity issue. Skipping update.`,
                );
                return false; // Do not update when modification date is invalid
            }
            // Get DOI information from DataCite using DoiClient
            const doiClient = new DoiClient();
            const doiLastModified = await doiClient.getDoiLastModified(doiIdentifier.value);
            if (!doiLastModified) {
                logger.warn(`Dataset ${dataset.publish_id}: Could not retrieve DOI modification date from DataCite`);
                return true; // Update anyway if we can't get DOI info
            }
            // Compare dataset modification date with DOI modification date
            const doiModified = DateTime.fromJSDate(doiLastModified);
            logger.debug(
                `Dataset ${dataset.publish_id}: Dataset modified: ${datasetModified.toISO()}, DOI modified: ${doiModified.toISO()}`,
            );
            // Update if dataset was modified after the DOI record
            return datasetModified > doiModified;
        } catch (error) {
            logger.warn(`Error checking update status for dataset ${dataset.publish_id}: ${error.message}`);
            return true; // Update anyway if we can't determine status
        }
    }
    private async updateDataciteRecord(dataset: Dataset, prefix: string, base_domain: string): Promise<void> {
        try {
            // Get the DOI identifier (HasOne relationship)
            let doiIdentifier = dataset.identifier;
            if (!doiIdentifier) {
                await dataset.load('identifier');
                doiIdentifier = dataset.identifier;
            }
            if (!doiIdentifier || doiIdentifier.type !== 'doi') {
                throw new Error('No DOI identifier found for dataset');
            }
            // Generate XML metadata
            const xmlMeta = (await Index.getDoiRegisterString(dataset)) as string;
            if (!xmlMeta) {
                throw new Error('Failed to generate XML metadata');
            }
            // Construct DOI value and landing page URL
            const doiValue = doiIdentifier.value; // Use existing DOI value
            const landingPageUrl = `https://doi.${getDomain(base_domain)}/${doiValue}`;
            // Update DataCite record
            const doiClient = new DoiClient();
            const dataciteResponse = await doiClient.registerDoi(doiValue, xmlMeta, landingPageUrl);
            if (dataciteResponse?.status === 201) {
                // // Update dataset modification date
                // dataset.server_date_modified = DateTime.now();
                // await dataset.save();
                // // Update search index
                // const index_name = 'tethys-records';
                // await Index.indexDocument(dataset, index_name);
                logger.debug(`Dataset ${dataset.publish_id}: DataCite record and search index updated successfully`);
            } else {
                throw new DoiClientException(
                    dataciteResponse?.status || 500,
                    `Unexpected DataCite response code: ${dataciteResponse?.status}`,
                );
            }
        } catch (error) {
            if (error instanceof DoiClientException) {
                throw error;
            }
            throw new Error(`Failed to update DataCite record: ${error.message}`);
        }
    }
    /**
     * Shows detailed statistics for a dataset that needs updating
     */
    private async showDatasetStats(dataset: Dataset): Promise<void> {
        try {
            let doiIdentifier = dataset.identifier;
            if (!doiIdentifier) {
                await dataset.load('identifier');
                doiIdentifier = dataset.identifier;
            }
            const doiValue = doiIdentifier?.value || 'N/A';
            const doiStatus = doiIdentifier?.status || 'N/A';
            const datasetModified = dataset.server_date_modified;
            // Get DOI info from DataCite
            const doiClient = new DoiClient();
            const doiLastModified = await doiClient.getDoiLastModified(doiValue);
            const doiState = await doiClient.getDoiState(doiValue);
            console.log(`
            ┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────
            │ DOI Value:           ${doiValue}
            │ DOI Status (DB):     ${doiStatus}
            │ DOI State (DataCite): ${doiState || 'Unknown'}
            │ Dataset Modified:    ${datasetModified ? datasetModified.toISO() : 'N/A'}
            │ DOI Modified:        ${doiLastModified ? DateTime.fromJSDate(doiLastModified).toISO() : 'N/A'}
            │ Needs Update:        YES - Dataset newer than DOI
            └─────────────────────────────────────────────────────────────────────────────────────────────`);
        } catch (error) {
            console.log(`
            ┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────
            │ DOI Value:           ${dataset.identifier?.value || 'N/A'}
            │ Error:               ${error.message}
            │ Needs Update:        YES - Error checking status
            └─────────────────────────────────────────────────────────────────────────────────────────────`);
        }
    }
 }
--- a/docs/commands/index-datasets.md
+++ b/docs/commands/index-datasets.md
@ -0,0 +1,278 @@
 # Dataset Indexing Command
 AdonisJS Ace command for indexing and synchronizing published datasets with OpenSearch for search functionality.
 ## Overview
 The `index:datasets` command processes published datasets and creates/updates corresponding search index documents in OpenSearch. It intelligently compares modification timestamps to only re-index datasets when necessary, optimizing performance while maintaining search index accuracy.
 ## Command Syntax
 ```bash
 node ace index:datasets [options]
 ```
 ## Options
 | Flag | Alias | Description |
 |------|-------|-------------|
 | `--publish_id <number>` | `-p` | Index a specific dataset by publish_id |
 ## Usage Examples
 ### Basic Operations
 ```bash
 # Index all published datasets that have been modified since last indexing
 node ace index:datasets
 # Index a specific dataset by publish_id
 node ace index:datasets --publish_id 231
 node ace index:datasets -p 231
 ```
 ## How It Works
 ### 1. **Dataset Selection**
 The command processes datasets that meet these criteria:
 - `server_state = 'published'` - Only published datasets
 - Has preloaded `xmlCache` relationship for metadata transformation
 - Optionally filtered by specific `publish_id`
 ### 2. **Smart Update Detection**
 For each dataset, the command:
 - Checks if the dataset exists in the OpenSearch index
 - Compares `server_date_modified` timestamps
 - Only re-indexes if the dataset is newer than the indexed version
 ### 3. **Document Processing**
 The indexing process involves:
 1. **XML Generation**: Creates structured XML from dataset metadata
 2. **XSLT Transformation**: Converts XML to JSON using Saxon-JS processor
 3. **Index Update**: Updates or creates the document in OpenSearch
 4. **Logging**: Records success/failure for each operation
 ## Index Structure
 ### Index Configuration
 - **Index Name**: `tethys-records`
 - **Document ID**: Dataset `publish_id`
 - **Refresh**: `true` (immediate availability)
 ### Document Fields
 The indexed documents contain:
 - **Metadata Fields**: Title, description, authors, keywords
 - **Identifiers**: DOI, publish_id, and other identifiers
 - **Temporal Data**: Publication dates, coverage periods
 - **Geographic Data**: Spatial coverage information
 - **Technical Details**: Data formats, access information
 - **Timestamps**: Creation and modification dates
 ## Example Output
 ### Successful Run
 ```bash
 node ace index:datasets
 ```
 ```
 Found 150 published datasets to process
 Dataset with publish_id 231 successfully indexed
 Dataset with publish_id 245 is up to date, skipping indexing
 Dataset with publish_id 267 successfully indexed
 An error occurred while indexing dataset with publish_id 289. Error: Invalid XML metadata
 Processing completed: 148 indexed, 1 skipped, 1 error
 ```
 ### Specific Dataset
 ```bash
 node ace index:datasets --publish_id 231
 ```
 ```
 Found 1 published dataset to process
 Dataset with publish_id 231 successfully indexed
 Processing completed: 1 indexed, 0 skipped, 0 errors
 ```
 ## Update Logic
 The command uses intelligent indexing to avoid unnecessary processing:
 | Condition | Action | Reason |
 |-----------|--------|--------|
 | Dataset not in index | ✅ Index | New dataset needs indexing |
 | Dataset newer than indexed version | ✅ Re-index | Dataset has been updated |
 | Dataset same/older than indexed version | ❌ Skip | Already up to date |
 | OpenSearch document check fails | ✅ Index | Better safe than sorry |
 | Invalid XML metadata | ❌ Skip + Log Error | Cannot process invalid data |
 ### Timestamp Comparison
 ```typescript
 // Example comparison logic
 const existingModified = DateTime.fromMillis(Number(existingDoc.server_date_modified) * 1000);
 const currentModified = dataset.server_date_modified;
 if (currentModified <= existingModified) {
    // Skip - already up to date
    return false;
 }
 // Proceed with indexing
 ```
 ## XML Transformation Process
 ### 1. **XML Generation**
 ```xml
 <?xml version="1.0" encoding="UTF-8" standalone="true"?>
 <root>
    <Dataset>
        <!-- Dataset metadata fields -->
        <title>Research Dataset Title</title>
        <description>Dataset description...</description>
        <!-- Additional metadata -->
    </Dataset>
 </root>
 ```
 ### 2. **XSLT Processing**
 The command uses Saxon-JS with a compiled stylesheet (`solr.sef.json`) to transform XML to JSON:
 ```javascript
 const result = await SaxonJS.transform({
    stylesheetText: proc,
    destination: 'serialized',
    sourceText: xmlString,
 });
 ```
 ### 3. **Final JSON Document**
 ```json
 {
    "id": "231",
    "title": "Research Dataset Title",
    "description": "Dataset description...",
    "authors": ["Author Name"],
    "server_date_modified": 1634567890,
    "publish_id": 231
 }
 ```
 ## Configuration Requirements
 ### Environment Variables
 ```bash
 # OpenSearch Configuration
 OPENSEARCH_HOST=localhost:9200
 # For production:
 # OPENSEARCH_HOST=your-opensearch-cluster:9200
 ```
 ### Required Files
 - **XSLT Stylesheet**: `public/assets2/solr.sef.json` - Compiled Saxon-JS stylesheet for XML transformation
 ### Database Relationships
 The command expects these model relationships:
 ```typescript
 // Dataset model must have:
@hasOne(() => XmlCache, { foreignKey: 'dataset_id' })
 public xmlCache: HasOne<typeof XmlCache>
 ```
 ## Error Handling
 The command handles various error scenarios gracefully:
 ### Common Errors and Solutions
 | Error | Cause | Solution |
 |-------|-------|----------|
 | `XSLT transformation failed` | Invalid XML or missing stylesheet | Check XML structure and stylesheet path |
 | `OpenSearch connection error` | Service unavailable | Verify OpenSearch is running and accessible |
 | `JSON parse error` | Malformed transformation result | Check XSLT stylesheet output format |
 | `Missing xmlCache relationship` | Data integrity issue | Ensure xmlCache exists for dataset |
 ### Error Logging
 ```bash
 # Typical error log entry
 An error occurred while indexing dataset with publish_id 231.
 Error: XSLT transformation failed: Invalid XML structure at line 15
 ```
 ## Performance Considerations
 ### Batch Processing
 - Processes datasets sequentially to avoid overwhelming OpenSearch
 - Each dataset is committed individually for reliability
 - Failed indexing of one dataset doesn't stop processing others
 ### Resource Usage
 - **Memory**: XML/JSON transformations require temporary memory
 - **Network**: OpenSearch API calls for each dataset
 - **CPU**: XSLT transformations are CPU-intensive
 ### Optimization Tips
 ```bash
 # Index only recently modified datasets (run regularly)
 node ace index:datasets
 # Index specific datasets when needed
 node ace index:datasets --publish_id 231
 # Consider running during off-peak hours for large batches
 ```
 ## Integration with Other Systems
 ### Search Functionality
 The indexed documents power:
 - **Dataset Search**: Full-text search across metadata
 - **Faceted Browsing**: Filter by authors, keywords, dates
 - **Geographic Search**: Spatial query capabilities
 - **Auto-complete**: Suggest dataset titles and keywords
 ### Related Commands
 - [`update:datacite`](update-datacite.md) - Often run after indexing to sync DOI metadata
 - **Database migrations** - May require re-indexing after schema changes
 ### API Integration
 The indexed data is consumed by:
 - **Search API**: `/api/search` endpoints
 - **Browse API**: `/api/datasets` with filtering
 - **Recommendations**: Related dataset suggestions
 ## Monitoring and Maintenance
 ### Regular Tasks
 ```bash
 # Daily indexing (recommended cron job)
 0 2 * * * cd /path/to/project && node ace index:datasets
 # Weekly full re-index (if needed)
 0 3 * * 0 cd /path/to/project && node ace index:datasets --force
 ```
 ### Health Checks
 - Monitor OpenSearch cluster health
 - Check for failed indexing operations in logs
 - Verify search functionality is working
 - Compare dataset counts between database and index
 ### Troubleshooting
 ```bash
 # Check specific dataset indexing
 node ace index:datasets --publish_id 231
 # Verify OpenSearch connectivity
 curl -X GET "localhost:9200/_cluster/health"
 # Check index statistics
 curl -X GET "localhost:9200/tethys-records/_stats"
 ```
 ## Best Practices
 1. **Regular Scheduling**: Run the command regularly (daily) to keep the search index current
 2. **Monitor Logs**: Watch for transformation errors or OpenSearch issues  
 3. **Backup Strategy**: Include OpenSearch indices in backup procedures
 4. **Resource Management**: Monitor OpenSearch cluster resources during bulk operations
 5. **Testing**: Verify search functionality after major indexing operations
 6. **Coordination**: Run indexing before DataCite updates when both are needed
--- a/docs/commands/update-datacite.md
+++ b/docs/commands/update-datacite.md
@ -0,0 +1,216 @@
 # DataCite Update Command
 AdonisJS Ace command for updating DataCite DOI records for published datasets.
 ## Overview
 The `update:datacite` command synchronizes your local dataset metadata with DataCite DOI records. It intelligently compares modification dates to only update records when necessary, reducing unnecessary API calls and maintaining data consistency.
 ## Command Syntax
 ```bash
 node ace update:datacite [options]
 ```
 ## Options
 | Flag | Alias | Description |
 |------|-------|-------------|
 | `--publish_id <number>` | `-p` | Update a specific dataset by publish_id |
 | `--force` | `-f` | Force update all records regardless of modification date |
 | `--dry-run` | `-d` | Preview what would be updated without making changes |
 | `--stats` | `-s` | Show detailed statistics for datasets that need updating |
 ## Usage Examples
 ### Basic Operations
 ```bash
 # Update all datasets that have been modified since their DOI was last updated
 node ace update:datacite
 # Update a specific dataset
 node ace update:datacite --publish_id 231
 node ace update:datacite -p 231
 # Force update all datasets with DOIs (ignores modification dates)
 node ace update:datacite --force
 ```
 ### Preview and Analysis
 ```bash
 # Preview what would be updated (dry run)
 node ace update:datacite --dry-run
 # Show detailed statistics for datasets that need updating
 node ace update:datacite --stats
 # Show stats for a specific dataset
 node ace update:datacite --stats --publish_id 231
 ```
 ### Combined Options
 ```bash
 # Dry run for a specific dataset
 node ace update:datacite --dry-run --publish_id 231
 # Show stats for all datasets (including up-to-date ones)
 node ace update:datacite --stats --force
 ```
 ## Command Modes
 ### 1. **Normal Mode** (Default)
 Updates DataCite records for datasets that have been modified since their DOI was last updated.
 **Example Output:**
 ```
 Using DataCite API: https://api.test.datacite.org
 Found 50 datasets to process
 Dataset 231: Successfully updated DataCite record
 Dataset 245: Up to date, skipping
 Dataset 267: Successfully updated DataCite record
 DataCite update completed. Updated: 15, Skipped: 35, Errors: 0
 ```
 ### 2. **Dry Run Mode** (`--dry-run`)
 Shows what would be updated without making any changes to DataCite.
 **Use Case:** Preview updates before running the actual command.
 **Example Output:**
 ```
 Dataset 231: Would update DataCite record (dry run)
 Dataset 267: Would update DataCite record (dry run)
 Dataset 245: Up to date, skipping
 DataCite update completed. Updated: 2, Skipped: 1, Errors: 0
 ```
 ### 3. **Stats Mode** (`--stats`)
 Shows detailed information for each dataset that needs updating, including why it needs updating.
 **Use Case:** Debug synchronization issues, monitor dataset/DOI status, generate reports.
 **Example Output:**
 ```
 ┌─ Dataset 231 ─────────────────────────────────────────────────────────
 │ DOI Value:           10.21388/tethys.231
 │ DOI Status (DB):     findable
 │ DOI State (DataCite): findable
 │ Dataset Modified:    2024-09-15T10:30:00.000Z
 │ DOI Modified:        2024-09-10T08:15:00.000Z
 │ Needs Update:        YES - Dataset newer than DOI
 └───────────────────────────────────────────────────────────────────────
 ┌─ Dataset 267 ─────────────────────────────────────────────────────────
 │ DOI Value:           10.21388/tethys.267
 │ DOI Status (DB):     findable
 │ DOI State (DataCite): findable
 │ Dataset Modified:    2024-09-18T14:20:00.000Z
 │ DOI Modified:        2024-09-16T12:45:00.000Z
 │ Needs Update:        YES - Dataset newer than DOI
 └───────────────────────────────────────────────────────────────────────
 DataCite Stats Summary: 2 datasets need updating, 48 are up to date
 ```
 ## Update Logic
 The command uses intelligent update detection:
 1. **Compares modification dates**: Dataset `server_date_modified` vs DOI last modification date from DataCite
 2. **Validates data integrity**: Checks for missing or future dates
 3. **Handles API failures gracefully**: Updates anyway if DataCite info can't be retrieved
 4. **Uses dual API approach**: DataCite REST API (primary) with MDS API fallback
 ### When Updates Happen
 | Condition | Action | Reason |
 |-----------|--------|--------|
 | Dataset modified > DOI modified | ✅ Update | Dataset has newer changes |
 | Dataset modified ≤ DOI modified | ❌ Skip | DOI is up to date |
 | Dataset date in future | ❌ Skip | Invalid data, needs investigation |
 | Dataset date missing | ✅ Update | Can't determine staleness |
 | DataCite API error | ✅ Update | Better safe than sorry |
 | `--force` flag used | ✅ Update | Override all logic |
 ## Environment Configuration
 Required environment variables:
 ```bash
 # DataCite Credentials
 DATACITE_USERNAME=your_username
 DATACITE_PASSWORD=your_password
 # API Endpoints (environment-specific)
 DATACITE_API_URL=https://api.test.datacite.org          # Test environment
 DATACITE_SERVICE_URL=https://mds.test.datacite.org      # Test MDS
 DATACITE_API_URL=https://api.datacite.org               # Production
 DATACITE_SERVICE_URL=https://mds.datacite.org           # Production MDS
 # Project Configuration
 DATACITE_PREFIX=10.21388                                # Your DOI prefix
 BASE_DOMAIN=tethys.at                                   # Your domain
 ```
 ## Error Handling
 The command handles various error scenarios:
 - **Invalid modification dates**: Logs errors but continues processing other datasets
 - **DataCite API failures**: Falls back to MDS API, then to safe update
 - **Missing DOI identifiers**: Skips datasets without DOI identifiers
 - **Network issues**: Continues with next dataset after logging error
 ## Integration
 The command integrates with:
 - **Dataset Model**: Uses `server_date_modified` for change detection
 - **DatasetIdentifier Model**: Reads DOI values and status
 - **OpenSearch Index**: Updates search index after DataCite update
 - **DoiClient**: Handles all DataCite API interactions
 ## Common Workflows
 ### Daily Maintenance
 ```bash
 # Update any datasets modified today
 node ace update:datacite
 ```
 ### Pre-Deployment Check
 ```bash
 # Check what would be updated before deployment
 node ace update:datacite --dry-run
 ```
 ### Debugging Sync Issues
 ```bash
 # Investigate why specific dataset isn't syncing
 node ace update:datacite --stats --publish_id 231
 ```
 ### Full Resync
 ```bash
 # Force update all DOI records (use with caution)
 node ace update:datacite --force
 ```
 ### Monitoring Report
 ```bash
 # Generate sync status report
 node ace update:datacite --stats > datacite-sync-report.txt
 ```
 ## Best Practices
 1. **Regular Updates**: Run daily or after bulk dataset modifications
 2. **Test First**: Use `--dry-run` or `--stats` before bulk operations
 3. **Monitor Logs**: Check for data integrity warnings
 4. **Environment Separation**: Use correct API URLs for test vs production
 5. **Rate Limiting**: The command handles DataCite rate limits automatically
--- a/package-lock.json
+++ b/package-lock.json
--- a/readme.md
+++ b/readme.md
@ -11,6 +11,8 @@ Welcome to the Tethys Research Repository Backend System! This is the backend co
 -   [Configuration](#configuration)
 -   [Database](#database)
 -   [API Documentation](#api-documentation)
 -   [Commands](#commands)
 -   [Documentation](#documentation)
 -   [Contributing](#contributing)
 -   [License](#license)
@ -29,5 +31,175 @@ Before you begin, ensure you have met the following requirements:
 1. Clone this repository:
    ```bash
-    git clone https://gitea.geologie.ac.at/geolba/tethys.backend.git
+    git clone git clone https://gitea.geologie.ac.at/geolba/tethys.backend.git
    cd tethys-backend
    ```
 2. Install dependencies:
    ```bash
    npm install
    ```
 3. Configure environment variables (see [Configuration](#configuration))
 4. Run database migrations:
    ```bash
    node ace migration:run
    ```
 5. Start the development server:
    ```bash
    npm run dev
    ```
 ## Usage
 The Tethys Backend provides RESTful APIs for managing research datasets, user authentication, DOI registration, and search functionality.
 ## Configuration
 Copy the `.env.example` file to `.env` and configure the following variables:
 ### Database Configuration
 ```bash
 DB_CONNECTION=pg
 DB_HOST=localhost
 DB_PORT=5432
 DB_USER=your_username
 DB_PASSWORD=your_password
 DB_DATABASE=tethys_db
 ```
 ### DataCite Configuration
 ```bash
 # DataCite Credentials
 DATACITE_USERNAME=your_datacite_username
 DATACITE_PASSWORD=your_datacite_password
 DATACITE_PREFIX=10.21388
 # Environment-specific API endpoints
 DATACITE_API_URL=https://api.test.datacite.org      # Test environment
 DATACITE_SERVICE_URL=https://mds.test.datacite.org  # Test MDS
 # For production:
 # DATACITE_API_URL=https://api.datacite.org
 # DATACITE_SERVICE_URL=https://mds.datacite.org
 ```
 ### OpenSearch Configuration
 ```bash
 OPENSEARCH_HOST=localhost:9200
 ```
 ### Application Configuration
 ```bash
 BASE_DOMAIN=tethys.at
 APP_KEY=your_app_key
 ```
 ## Database
 The system uses PostgreSQL with Lucid ORM. Key models include:
 - **Dataset**: Research dataset metadata
 - **DatasetIdentifier**: DOI and other identifiers for datasets
 - **User**: User management and authentication
 - **XmlCache**: Cached XML metadata
 Run migrations and seeders:
 ```bash
 # Run migrations
 node ace migration:run
 # Run seeders (if available)
 node ace db:seed
 ```
 ## API Documentation
 API endpoints are available for:
 - Dataset management (`/api/datasets`)
 - User authentication (`/api/auth`)
 - DOI registration (`/api/doi`)
 - Search functionality (`/api/search`)
 *Detailed API documentation can be found in the `/docs/api` directory.*
 ## Commands
 The system includes several Ace commands for maintenance and data management:
 ### Dataset Indexing
 ```bash
 # Index all published datasets to OpenSearch
 node ace index:datasets
 # Index a specific dataset
 node ace index:datasets --publish_id 123
 ```
 ### DataCite DOI Management
 ```bash
 # Update DataCite records for modified datasets
 node ace update:datacite
 # Show detailed statistics for datasets needing updates
 node ace update:datacite --stats
 # Preview what would be updated (dry run)
 node ace update:datacite --dry-run
 # Force update all DOI records
 node ace update:datacite --force
 # Update a specific dataset
 node ace update:datacite --publish_id 123
 ```
 *For detailed command documentation, see the [Commands Documentation](docs/commands/)*
 ## Documentation
 Comprehensive documentation is available in the `/docs` directory:
 - **[Commands Documentation](docs/commands/)** - Detailed guides for Ace commands
  - [DataCite Update Command](docs/commands/update-datacite.md) - DOI synchronization and management
  - [Dataset Indexing Command](docs/commands/index-datasets.md) - Search index management
 - **[API Documentation](docs/api/)** - REST API endpoints and usage
 - **[Deployment Guide](docs/deployment/)** - Production deployment instructions
 - **[Configuration Guide](docs/configuration/)** - Environment setup and configuration options
 ## Contributing
 1. Fork the repository
 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
 4. Push to the branch (`git push origin feature/amazing-feature`)
 5. Open a Pull Request
 ### Development Guidelines
 - Follow the existing code style and conventions
 - Write tests for new features
 - Update documentation for any API changes
 - Ensure all commands and migrations work properly
 ### Testing Commands
 ```bash
 # Run tests
 npm test
 # Test specific commands
 node ace update:datacite --dry-run --publish_id 123
 node ace index:datasets --publish_id 123
 ```
 ## License
 This project is licensed under the [MIT License](LICENSE).
--- a/start/routes/api.ts
+++ b/start/routes/api.ts
@ -8,14 +8,24 @@ import AvatarController from '#controllers/Http/Api/AvatarController';
 import UserController from '#controllers/Http/Api/UserController';
 import CollectionsController from '#controllers/Http/Api/collections_controller';
 import { middleware } from '../kernel.js';
-// API
+
 // Clean DOI URL routes (no /api prefix)
 // API routes with /api prefix
 router
    .group(() => {
-        router.get('clients', [UserController, 'getSubmitters']).as('client.index').use(middleware.auth());;
+        router.get('clients', [UserController, 'getSubmitters']).as('client.index').use(middleware.auth());
-        router.get('authors', [AuthorsController, 'index']).as('author.index').use(middleware.auth());;
+        router.get('authors', [AuthorsController, 'index']).as('author.index').use(middleware.auth());
        router.get('datasets', [DatasetController, 'index']).as('dataset.index');
        router.get('persons', [AuthorsController, 'persons']).as('author.persons');
        // This should come BEFORE any other routes that might conflict
        router
            .get('/dataset/:prefix/:value', [DatasetController, 'findByIdentifier'])
            .where('prefix', /^10\.\d+$/) // Match DOI prefix pattern (10.xxxx)
            .where('value', /^[a-zA-Z0-9._-]+\.[0-9]+(?:\.[0-9]+)*$/) // Match DOI suffix pattern
            .as('dataset.findByIdentifier');
        router.get('/dataset', [DatasetController, 'findAll']).as('dataset.findAll');
        router.get('/dataset/:publish_id', [DatasetController, 'findOne']).as('dataset.findOne');
        router.get('/sitelinks/:year', [HomeController, 'findDocumentsPerYear']);
@ -35,7 +45,7 @@ router
            .as('apps.twofactor_backupcodes.create')
            .use(middleware.auth());
-        router.get('collections/:id', [CollectionsController, 'show']).as('collection.show')
+        router.get('collections/:id', [CollectionsController, 'show']).as('collection.show');
    })
    // .namespace('App/Controllers/Http/Api')
    .prefix('api');
--- a/start/rules/orcid.ts
+++ b/start/rules/orcid.ts
@ -1,7 +1,7 @@
 /*
 |--------------------------------------------------------------------------
 | Preloaded File - node ace make:preload rules/orcid
-| ❯ Do you want to register the preload file in .adonisrc.ts file? (y/N) · true
+| Do you want to register the preload file in .adonisrc.ts file? (y/N) · true
 | DONE:    create start/rules/orcid.ts
 | DONE:    update adonisrc.ts file
 |--------------------------------------------------------------------------