fix: repair related_document_id for version references
Add `fix:version-related-ids` ace command to backfill and correct related_document_id on IsNewVersionOf / IsPreviousVersionOf references, resolving the target dataset via its DOI. Handles both NULL and self-referential (wrong) values that the existing detect command could not repair. Make the dataset version-chain API DOI-based: resolve previous/newer versions through the DOI in the reference value instead of the unreliable related_document_id, so the chain is correct regardless of the stored FK.
This commit is contained in:
parent
9368a0dd8d
commit
9c0221ce27
2 changed files with 347 additions and 69 deletions
|
|
@ -210,13 +210,13 @@ export default class DatasetController {
|
|||
*/
|
||||
private async buildVersionChain(dataset: Dataset) {
|
||||
const versionChain = {
|
||||
current: {
|
||||
id: dataset.id,
|
||||
publish_id: dataset.publish_id,
|
||||
doi: dataset.identifier?.value || null,
|
||||
main_title: dataset.mainTitle || null,
|
||||
server_date_published: dataset.server_date_published,
|
||||
},
|
||||
// current: {
|
||||
// id: dataset.id,
|
||||
// publish_id: dataset.publish_id,
|
||||
// doi: dataset.identifier?.value || null,
|
||||
// main_title: dataset.mainTitle || null,
|
||||
// server_date_published: dataset.server_date_published,
|
||||
// },
|
||||
previousVersions: [] as any[],
|
||||
newerVersions: [] as any[],
|
||||
};
|
||||
|
|
@ -233,92 +233,181 @@ export default class DatasetController {
|
|||
/**
|
||||
* Recursively get all previous versions
|
||||
*/
|
||||
// private async getPreviousVersions(datasetId: number, visited: Set<number> = new Set()): Promise<any[]> {
|
||||
// // Prevent infinite loops
|
||||
// if (visited.has(datasetId)) {
|
||||
// return [];
|
||||
// }
|
||||
// visited.add(datasetId);
|
||||
|
||||
// const previousVersions: any[] = [];
|
||||
|
||||
// // Find references where this dataset "IsNewVersionOf" another dataset
|
||||
// const previousRefs = await DatasetReference.query()
|
||||
// .where('document_id', datasetId)
|
||||
// .where('relation', 'IsNewVersionOf')
|
||||
// .whereNotNull('related_document_id');
|
||||
|
||||
// for (const ref of previousRefs) {
|
||||
// if (!ref.related_document_id) continue;
|
||||
|
||||
// const previousDataset = await Dataset.query()
|
||||
// .where('id', ref.related_document_id)
|
||||
// .preload('identifier')
|
||||
// .preload('titles')
|
||||
// .first();
|
||||
|
||||
// if (previousDataset) {
|
||||
// const versionInfo = {
|
||||
// id: previousDataset.id,
|
||||
// publish_id: previousDataset.publish_id,
|
||||
// doi: previousDataset.identifier?.value || null,
|
||||
// main_title: previousDataset.mainTitle || null,
|
||||
// server_date_published: previousDataset.server_date_published,
|
||||
// relation: 'IsPreviousVersionOf', // From perspective of current dataset
|
||||
// };
|
||||
|
||||
// previousVersions.push(versionInfo);
|
||||
|
||||
// // Recursively get even older versions
|
||||
// const olderVersions = await this.getPreviousVersions(previousDataset.id, visited);
|
||||
// previousVersions.push(...olderVersions);
|
||||
// }
|
||||
// }
|
||||
|
||||
// return previousVersions;
|
||||
// }
|
||||
|
||||
private async getPreviousVersions(datasetId: number, visited: Set<number> = new Set()): Promise<any[]> {
|
||||
// Prevent infinite loops
|
||||
if (visited.has(datasetId)) {
|
||||
return [];
|
||||
}
|
||||
if (visited.has(datasetId)) return [];
|
||||
visited.add(datasetId);
|
||||
|
||||
const previousVersions: any[] = [];
|
||||
const result: any[] = [];
|
||||
|
||||
// Find references where this dataset "IsNewVersionOf" another dataset
|
||||
const previousRefs = await DatasetReference.query()
|
||||
// A dataset points to its OLDER version via relation 'IsNewVersionOf'
|
||||
const refs = await DatasetReference.query()
|
||||
.where('document_id', datasetId)
|
||||
.where('relation', 'IsNewVersionOf')
|
||||
.whereNotNull('related_document_id');
|
||||
.where('relation', 'IsNewVersionOf'); // ← removed .whereNotNull('related_document_id')
|
||||
|
||||
for (const ref of previousRefs) {
|
||||
if (!ref.related_document_id) continue;
|
||||
for (const ref of refs) {
|
||||
const related = await this.resolveReferencedDataset(ref, datasetId);
|
||||
if (!related) continue;
|
||||
|
||||
const previousDataset = await Dataset.query()
|
||||
.where('id', ref.related_document_id)
|
||||
.preload('identifier')
|
||||
.preload('titles')
|
||||
.first();
|
||||
result.push({
|
||||
id: related.id,
|
||||
publish_id: related.publish_id,
|
||||
doi: related.identifier?.value || null,
|
||||
main_title: related.mainTitle || null,
|
||||
server_date_published: related.server_date_published,
|
||||
relation: 'IsPreviousVersionOf',
|
||||
});
|
||||
|
||||
if (previousDataset) {
|
||||
const versionInfo = {
|
||||
id: previousDataset.id,
|
||||
publish_id: previousDataset.publish_id,
|
||||
doi: previousDataset.identifier?.value || null,
|
||||
main_title: previousDataset.mainTitle || null,
|
||||
server_date_published: previousDataset.server_date_published,
|
||||
relation: 'IsPreviousVersionOf', // From perspective of current dataset
|
||||
};
|
||||
|
||||
previousVersions.push(versionInfo);
|
||||
|
||||
// Recursively get even older versions
|
||||
const olderVersions = await this.getPreviousVersions(previousDataset.id, visited);
|
||||
previousVersions.push(...olderVersions);
|
||||
}
|
||||
result.push(...(await this.getPreviousVersions(related.id, visited)));
|
||||
}
|
||||
|
||||
return previousVersions;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively get all newer versions
|
||||
*/
|
||||
// private async getNewerVersions(datasetId: number, visited: Set<number> = new Set()): Promise<any[]> {
|
||||
// // Prevent infinite loops
|
||||
// if (visited.has(datasetId)) {
|
||||
// return [];
|
||||
// }
|
||||
// visited.add(datasetId);
|
||||
|
||||
// const newerVersions: any[] = [];
|
||||
|
||||
// // Find references where this dataset "IsPreviousVersionOf" another dataset
|
||||
// const newerRefs = await DatasetReference.query()
|
||||
// .where('document_id', datasetId)
|
||||
// .where('relation', 'IsPreviousVersionOf')
|
||||
// .whereNotNull('related_document_id');
|
||||
|
||||
// for (const ref of newerRefs) {
|
||||
// if (!ref.related_document_id) continue;
|
||||
|
||||
// const newerDataset = await Dataset.query().where('id', ref.related_document_id).preload('identifier').preload('titles').first();
|
||||
|
||||
// if (newerDataset) {
|
||||
// const versionInfo = {
|
||||
// id: newerDataset.id,
|
||||
// publish_id: newerDataset.publish_id,
|
||||
// doi: newerDataset.identifier?.value || null,
|
||||
// main_title: newerDataset.mainTitle || null,
|
||||
// server_date_published: newerDataset.server_date_published,
|
||||
// relation: 'IsNewVersionOf', // From perspective of current dataset
|
||||
// };
|
||||
|
||||
// newerVersions.push(versionInfo);
|
||||
|
||||
// // Recursively get even newer versions
|
||||
// const evenNewerVersions = await this.getNewerVersions(newerDataset.id, visited);
|
||||
// newerVersions.push(...evenNewerVersions);
|
||||
// }
|
||||
// }
|
||||
|
||||
// return newerVersions;
|
||||
// }
|
||||
private async getNewerVersions(datasetId: number, visited: Set<number> = new Set()): Promise<any[]> {
|
||||
// Prevent infinite loops
|
||||
if (visited.has(datasetId)) {
|
||||
return [];
|
||||
}
|
||||
if (visited.has(datasetId)) return [];
|
||||
visited.add(datasetId);
|
||||
|
||||
const newerVersions: any[] = [];
|
||||
const result: any[] = [];
|
||||
|
||||
// Find references where this dataset "IsPreviousVersionOf" another dataset
|
||||
const newerRefs = await DatasetReference.query()
|
||||
// A dataset points to its NEWER version via relation 'IsPreviousVersionOf'
|
||||
const refs = await DatasetReference.query()
|
||||
.where('document_id', datasetId)
|
||||
.where('relation', 'IsPreviousVersionOf')
|
||||
.whereNotNull('related_document_id');
|
||||
.where('relation', 'IsPreviousVersionOf'); // ← removed .whereNotNull(...)
|
||||
|
||||
for (const ref of newerRefs) {
|
||||
if (!ref.related_document_id) continue;
|
||||
for (const ref of refs) {
|
||||
const related = await this.resolveReferencedDataset(ref, datasetId);
|
||||
if (!related) continue;
|
||||
|
||||
const newerDataset = await Dataset.query().where('id', ref.related_document_id).preload('identifier').preload('titles').first();
|
||||
result.push({
|
||||
id: related.id,
|
||||
publish_id: related.publish_id,
|
||||
doi: related.identifier?.value || null,
|
||||
main_title: related.mainTitle || null,
|
||||
server_date_published: related.server_date_published,
|
||||
relation: 'IsNewVersionOf',
|
||||
});
|
||||
|
||||
if (newerDataset) {
|
||||
const versionInfo = {
|
||||
id: newerDataset.id,
|
||||
publish_id: newerDataset.publish_id,
|
||||
doi: newerDataset.identifier?.value || null,
|
||||
main_title: newerDataset.mainTitle || null,
|
||||
server_date_published: newerDataset.server_date_published,
|
||||
relation: 'IsNewVersionOf', // From perspective of current dataset
|
||||
};
|
||||
|
||||
newerVersions.push(versionInfo);
|
||||
|
||||
// Recursively get even newer versions
|
||||
const evenNewerVersions = await this.getNewerVersions(newerDataset.id, visited);
|
||||
newerVersions.push(...evenNewerVersions);
|
||||
}
|
||||
result.push(...(await this.getNewerVersions(related.id, visited)));
|
||||
}
|
||||
|
||||
return newerVersions;
|
||||
return result;
|
||||
}
|
||||
|
||||
private async resolveReferencedDataset(ref: DatasetReference, currentDatasetId: number) {
|
||||
const doi = this.normalizeDoi(ref.value);
|
||||
|
||||
if (doi) {
|
||||
const byDoi = await Dataset.query()
|
||||
.whereHas('identifier', (q) => q.where('value', doi))
|
||||
.preload('identifier')
|
||||
.preload('titles') // needed so mainTitle computes
|
||||
.first();
|
||||
if (byDoi) return byDoi;
|
||||
}
|
||||
|
||||
if (ref.related_document_id && ref.related_document_id !== currentDatasetId) {
|
||||
return await Dataset.query()
|
||||
.where('id', ref.related_document_id)
|
||||
.preload('identifier')
|
||||
.preload('titles')
|
||||
.first();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
private normalizeDoi(value: string | null): string | null {
|
||||
if (!value) return null;
|
||||
return value
|
||||
.trim()
|
||||
.replace(/^https?:\/\/(dx\.)?doi\.org\//i, '')
|
||||
.replace(/^doi:/i, '');
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue