/* |-------------------------------------------------------------------------- | node ace make:command update-datacite | DONE: create commands/update_datacite.ts |-------------------------------------------------------------------------- */ import { BaseCommand, flags } from '@adonisjs/core/ace'; import { CommandOptions } from '@adonisjs/core/types/ace'; import Dataset from '#models/dataset'; import { DoiClient } from '#app/Library/Doi/DoiClient'; import DoiClientException from '#app/exceptions/DoiClientException'; import Index from '#app/Library/Utils/Index'; import env from '#start/env'; import logger from '@adonisjs/core/services/logger'; import { DateTime } from 'luxon'; import { getDomain } from '#app/utils/utility-functions'; export default class UpdateDatacite extends BaseCommand { static commandName = 'update:datacite'; static description = 'Update DataCite DOI records for published datasets'; public static needsApplication = true; @flags.number({ alias: 'p', description: 'Specific publish_id to update' }) public publish_id: number; @flags.boolean({ alias: 'f', description: 'Force update all records regardless of modification date' }) public force: boolean = false; @flags.boolean({ alias: 'd', description: 'Dry run - show what would be updated without making changes' }) public dryRun: boolean = false; @flags.boolean({ alias: 's', description: 'Show detailed stats for each dataset that needs updating' }) public stats: boolean = false; //example: node ace update:datacite -p 123 --force --dry-run public static options: CommandOptions = { startApp: true, // Whether to boot the application before running the command stayAlive: false, // Whether to keep the process alive after the command has executed }; async run() { logger.info('Starting DataCite update process...'); const prefix = env.get('DATACITE_PREFIX', ''); const base_domain = env.get('BASE_DOMAIN', ''); const apiUrl = env.get('DATACITE_API_URL', 'https://api.datacite.org'); if (!prefix || !base_domain) { logger.error('Missing DATACITE_PREFIX or BASE_DOMAIN environment variables'); return; } logger.info(`Using DataCite API: ${apiUrl}`); const datasets = await this.getDatasets(); logger.info(`Found ${datasets.length} datasets to process`); let updated = 0; let skipped = 0; let errors = 0; for (const dataset of datasets) { try { const shouldUpdate = this.force || (await this.shouldUpdateDataset(dataset)); if (this.stats) { // Stats mode: show detailed information for datasets that need updating if (shouldUpdate) { await this.showDatasetStats(dataset); updated++; } else { skipped++; } continue; } if (!shouldUpdate) { logger.info(`Dataset ${dataset.publish_id}: Up to date, skipping`); skipped++; continue; } if (this.dryRun) { logger.info(`Dataset ${dataset.publish_id}: Would update DataCite record (dry run)`); updated++; continue; } await this.updateDataciteRecord(dataset, prefix, base_domain); logger.info(`Dataset ${dataset.publish_id}: Successfully updated DataCite record`); updated++; } catch (error) { logger.error(`Dataset ${dataset.publish_id}: Failed to update - ${error.message}`); errors++; } } if (this.stats) { logger.info(`\nDataCite Stats Summary: ${updated} datasets need updating, ${skipped} are up to date`); } else { logger.info(`DataCite update completed. Updated: ${updated}, Skipped: ${skipped}, Errors: ${errors}`); } } private async getDatasets(): Promise { const query = Dataset.query() .preload('identifier') .preload('xmlCache') .where('server_state', 'published') .whereHas('identifier', (identifierQuery) => { identifierQuery.where('type', 'doi'); }); if (this.publish_id) { query.where('publish_id', this.publish_id); } return await query.exec(); } private async shouldUpdateDataset(dataset: Dataset): Promise { try { let doiIdentifier = dataset.identifier; if (!doiIdentifier) { await dataset.load('identifier'); doiIdentifier = dataset.identifier; } if (!doiIdentifier || doiIdentifier.type !== 'doi') { return false; } const datasetModified = dataset.server_date_modified; const now = DateTime.now(); if (!datasetModified) { return true; // Update if modification date is missing } if (datasetModified > now) { return false; // Skip invalid future dates } // Check DataCite DOI modification date const doiClient = new DoiClient(); const doiLastModified = await doiClient.getDoiLastModified(doiIdentifier.value); if (!doiLastModified) { return false; // not Update if we can't get DOI info } const doiModified = DateTime.fromJSDate(doiLastModified); if (datasetModified > doiModified) { // if dataset was modified after DOI creation // Calculate the difference in seconds const diffInSeconds = Math.abs(datasetModified.diff(doiModified, 'seconds').seconds); // Define tolerance threshold (60 seconds = 1 minute) const toleranceSeconds = 60; // Only update if the difference is greater than the tolerance // This prevents unnecessary updates for minor timestamp differences return diffInSeconds > toleranceSeconds; } else { return false; // No update needed } } catch (error) { return false; // not update if we can't determine status or other error } } private async updateDataciteRecord(dataset: Dataset, prefix: string, base_domain: string): Promise { try { // Get the DOI identifier (HasOne relationship) let doiIdentifier = dataset.identifier; if (!doiIdentifier) { await dataset.load('identifier'); doiIdentifier = dataset.identifier; } if (!doiIdentifier || doiIdentifier.type !== 'doi') { throw new Error('No DOI identifier found for dataset'); } // Generate XML metadata const xmlMeta = (await Index.getDoiRegisterString(dataset)) as string; if (!xmlMeta) { throw new Error('Failed to generate XML metadata'); } // Construct DOI value and landing page URL const doiValue = doiIdentifier.value; // Use existing DOI value const landingPageUrl = `https://doi.${getDomain(base_domain)}/${doiValue}`; // Update DataCite record const doiClient = new DoiClient(); const dataciteResponse = await doiClient.registerDoi(doiValue, xmlMeta, landingPageUrl); if (dataciteResponse?.status === 201) { // // Update dataset modification date // dataset.server_date_modified = DateTime.now(); // await dataset.save(); // // Update search index // const index_name = 'tethys-records'; // await Index.indexDocument(dataset, index_name); logger.debug(`Dataset ${dataset.publish_id}: DataCite record and search index updated successfully`); } else { throw new DoiClientException( dataciteResponse?.status || 500, `Unexpected DataCite response code: ${dataciteResponse?.status}`, ); } } catch (error) { if (error instanceof DoiClientException) { throw error; } throw new Error(`Failed to update DataCite record: ${error.message}`); } } /** * Shows detailed statistics for a dataset that needs updating */ private async showDatasetStats(dataset: Dataset): Promise { try { let doiIdentifier = dataset.identifier; if (!doiIdentifier) { await dataset.load('identifier'); doiIdentifier = dataset.identifier; } const doiValue = doiIdentifier?.value || 'N/A'; const doiStatus = doiIdentifier?.status || 'N/A'; const datasetModified = dataset.server_date_modified; // Get DOI info from DataCite const doiClient = new DoiClient(); const doiLastModified = await doiClient.getDoiLastModified(doiValue); const doiState = await doiClient.getDoiState(doiValue); console.log(` ┌─ Dataset ${dataset.publish_id} ─────────────────────────────────────────────────────────────── │ DOI Value: ${doiValue} │ DOI Status (DB): ${doiStatus} │ DOI State (DataCite): ${doiState || 'Unknown'} │ Dataset Modified: ${datasetModified ? datasetModified.toISO() : 'N/A'} │ DOI Modified: ${doiLastModified ? DateTime.fromJSDate(doiLastModified).toISO() : 'N/A'} │ Needs Update: YES - Dataset newer than DOI └─────────────────────────────────────────────────────────────────────────────────────────────`); } catch (error) { console.log(` ┌─ Dataset ${dataset.publish_id} ─────────────────────────────────────────────────────────────── │ DOI Value: ${dataset.identifier?.value || 'N/A'} │ Error: ${error.message} │ Needs Update: YES - Error checking status └─────────────────────────────────────────────────────────────────────────────────────────────`); } } }