/* |-------------------------------------------------------------------------- | node ace make:command fix-version-related-ids | DONE: create commands/fix_version_related_ids.ts |-------------------------------------------------------------------------- | Repairs the `related_document_id` foreign key on version references | (IsNewVersionOf / IsPreviousVersionOf, both directions). | | The DOI stored in `value` is the reliable link; `related_document_id` | is frequently NULL or self-referential. This command resolves the target | dataset via its DOI and sets `related_document_id` accordingly, correcting | both NULL and wrong-but-non-null values. | | Examples: | node ace fix:version-related-ids // dry run, all datasets | node ace fix:version-related-ids --verbose // dry run with per-row detail | node ace fix:version-related-ids --fix // apply changes | node ace fix:version-related-ids --fix -p 226 // apply, only refs owned by publish_id 226 */ import { BaseCommand, flags } from '@adonisjs/core/ace'; import type { CommandOptions } from '@adonisjs/core/types/ace'; import Dataset from '#models/dataset'; import DatasetReference from '#models/dataset_reference'; export default class FixVersionRelatedIds extends BaseCommand { static commandName = 'fix:version-related-ids'; static description = 'Backfill/repair related_document_id on IsNewVersionOf / IsPreviousVersionOf references by resolving the target dataset via its DOI'; public static needsApplication = true; @flags.boolean({ alias: 'f', description: 'Apply changes. Without this flag the command runs as a dry run.' }) public fix: boolean = false; @flags.boolean({ alias: 'v', description: 'Verbose output (per-reference detail)' }) public verbose: boolean = false; @flags.number({ alias: 'p', description: 'Only process references owned by this publish_id' }) public publish_id?: number; public static options: CommandOptions = { startApp: true, staysAlive: false, }; // Only the version relations, both directions. private readonly VERSION_RELATIONS = ['IsNewVersionOf', 'IsPreviousVersionOf']; async run() { this.logger.info(`🔍 Scanning ${this.VERSION_RELATIONS.join(' / ')} references...`); this.logger.info(this.fix ? '✏️ Mode: APPLY (changes will be written)' : '👀 Mode: DRY RUN (no changes written)'); if (typeof this.publish_id === 'number') { this.logger.info(`🎯 Filtering by owning publish_id: ${this.publish_id}`); } try { const query = DatasetReference.query() .whereIn('relation', this.VERSION_RELATIONS) .whereIn('type', ['DOI', 'URL']) .where((q) => { q.where('value', 'like', '%doi.org/10.24341/tethys.%').orWhere('value', 'like', '%tethys.at/dataset/%'); }); // Restrict to references owned by a specific dataset (by publish_id), if requested. if (typeof this.publish_id === 'number') { query.whereHas('dataset', (d) => d.where('publish_id', this.publish_id as number)); } const refs = await query.exec(); this.logger.info(`🔗 Found ${refs.length} version reference(s) to inspect`); let alreadyCorrect = 0; let filledFromNull = 0; let correctedWrong = 0; let unresolved = 0; for (const ref of refs) { const target = await this.resolveTarget(ref); if (!target) { unresolved++; if (this.verbose) { this.logger.warning(`⚠️ Reference ${ref.id}: could not resolve target (value: ${ref.value})`); } continue; } // Never let a reference point at its own owning document. if (target.id === ref.document_id) { unresolved++; if (this.verbose) { this.logger.warning( `⚠️ Reference ${ref.id}: target resolves to its own document (${ref.document_id}); skipping self-link`, ); } continue; } if (ref.related_document_id === target.id) { alreadyCorrect++; continue; } const previous = ref.related_document_id; const wasNull = previous === null || previous === undefined; if (this.fix) { ref.related_document_id = target.id; await ref.save(); } if (wasNull) { filledFromNull++; } else { correctedWrong++; } if (this.verbose) { const action = this.fix ? 'Updated' : '📝 Would update'; this.logger.info( `${action} reference ${ref.id} (doc ${ref.document_id}, ${ref.relation}): ` + `related_document_id ${previous ?? 'NULL'} → ${target.id} (publish_id ${target.publish_id})`, ); } } this.logger.info('────────────────────────────────────────'); this.logger.info(`✔️ Already correct: ${alreadyCorrect}`); this.logger.info(`➕ Filled from NULL: ${filledFromNull}`); this.logger.info(`🔧 Corrected wrong value: ${correctedWrong}`); this.logger.info(`⚠️ Unresolved/skipped: ${unresolved}`); this.logger.info('────────────────────────────────────────'); const changes = filledFromNull + correctedWrong; if (!this.fix && changes > 0) { this.logger.info(`💡 Dry run only. Re-run with --fix to write ${changes} change(s).`); } else if (this.fix) { this.logger.success(`Done. ${changes} reference(s) updated.`); } else { this.logger.success('Nothing to change — all version references already linked correctly.'); } } catch (error) { this.logger.error('Error fixing version related_document_id values:', error); process.exit(1); } } /** * Resolve the dataset a version reference points to. * Prefers the DOI in `value` (reliable); falls back to a tethys publish_id URL. */ private async resolveTarget(ref: DatasetReference): Promise { const doi = this.normalizeDoi(ref.value); if (doi) { const byDoi = await Dataset.query() .whereHas('identifier', (q) => q.where('value', doi)) .first(); if (byDoi) return byDoi; } const publishId = this.extractPublishId(ref.value); if (publishId) { const byPublishId = await Dataset.query().where('publish_id', publishId).first(); if (byPublishId) return byPublishId; } return null; } /** * Strip the resolver prefix so a reference value like * "https://doi.org/10.24341/tethys.108.2" matches the identifier * table value "10.24341/tethys.108.2". Returns null if it isn't a DOI. */ private normalizeDoi(value: string | null): string | null { if (!value) return null; const cleaned = value .trim() .replace(/^https?:\/\/(dx\.)?doi\.org\//i, '') .replace(/^doi:/i, ''); return /^10\.\d{4,}\//.test(cleaned) ? cleaned : null; } private extractPublishId(value: string | null): number | null { if (!value) return null; const urlMatch = value.match(/tethys\.at\/dataset\/(\d+)/); return urlMatch ? parseInt(urlMatch[1], 10) : null; } }