Add `fix:version-related-ids` ace command to backfill and correct related_document_id on IsNewVersionOf / IsPreviousVersionOf references, resolving the target dataset via its DOI. Handles both NULL and self-referential (wrong) values that the existing detect command could not repair. Make the dataset version-chain API DOI-based: resolve previous/newer versions through the DOI in the reference value instead of the unreliable related_document_id, so the chain is correct regardless of the stored FK.
413 lines
16 KiB
TypeScript
413 lines
16 KiB
TypeScript
import type { HttpContext } from '@adonisjs/core/http';
|
|
import Dataset from '#models/dataset';
|
|
import { StatusCodes } from 'http-status-codes';
|
|
import DatasetReference from '#models/dataset_reference';
|
|
|
|
// node ace make:controller Author
|
|
export default class DatasetController {
|
|
/**
|
|
* GET /api/datasets
|
|
* Find all published datasets
|
|
*/
|
|
public async index({ response }: HttpContext) {
|
|
try {
|
|
const datasets = await Dataset.query()
|
|
.where(function (query) {
|
|
query.where('server_state', 'published').orWhere('server_state', 'deleted');
|
|
})
|
|
.preload('titles')
|
|
.preload('identifier')
|
|
.orderBy('server_date_published', 'desc');
|
|
|
|
return response.status(StatusCodes.OK).json(datasets);
|
|
} catch (error) {
|
|
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
|
|
message: error.message || 'Some error occurred while retrieving datasets.',
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* GET /api/dataset
|
|
* Find all published datasets
|
|
*/
|
|
public async findAll({ response }: HttpContext) {
|
|
try {
|
|
const datasets = await Dataset.query()
|
|
.where('server_state', 'published')
|
|
.orWhere('server_state', 'deleted')
|
|
.preload('descriptions') // Preload any relationships you need
|
|
.orderBy('server_date_published');
|
|
return response.status(StatusCodes.OK).json(datasets);
|
|
} catch (error) {
|
|
return response.status(500).json({
|
|
message: error.message || 'Some error occurred while retrieving datasets.',
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* GET /api/dataset/:publish_id
|
|
* Find one dataset by publish_id
|
|
*/
|
|
public async findOne({ response, params }: HttpContext) {
|
|
try {
|
|
const dataset = await Dataset.query()
|
|
.where('publish_id', params.publish_id)
|
|
.preload('titles')
|
|
.preload('descriptions') // Using 'descriptions' instead of 'abstracts'
|
|
.preload('user', (builder) => {
|
|
builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
|
|
})
|
|
.preload('authors', (builder) => {
|
|
builder
|
|
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
|
.withCount('datasets', (query) => {
|
|
query.as('datasets_count');
|
|
})
|
|
.pivotColumns(['role', 'sort_order'])
|
|
.orderBy('pivot_sort_order', 'asc');
|
|
})
|
|
.preload('contributors', (builder) => {
|
|
builder
|
|
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
|
.withCount('datasets', (query) => {
|
|
query.as('datasets_count');
|
|
})
|
|
.pivotColumns(['role', 'sort_order', 'contributor_type'])
|
|
.orderBy('pivot_sort_order', 'asc');
|
|
})
|
|
.preload('subjects')
|
|
.preload('coverage')
|
|
.preload('licenses')
|
|
.preload('references')
|
|
.preload('project')
|
|
// .preload('referenced_by', (builder) => {
|
|
// builder.preload('dataset', (builder) => {
|
|
// builder.preload('identifier');
|
|
// });
|
|
// })
|
|
.preload('files', (builder) => {
|
|
builder.preload('hashvalues');
|
|
})
|
|
.preload('identifier')
|
|
.first(); // Use first() instead of firstOrFail() to handle not found gracefully
|
|
|
|
if (!dataset) {
|
|
return response.status(StatusCodes.NOT_FOUND).json({
|
|
message: `Cannot find Dataset with publish_id=${params.publish_id}.`,
|
|
});
|
|
}
|
|
|
|
// Build the version chain
|
|
const versionChain = await this.buildVersionChain(dataset);
|
|
|
|
// Add version chain to response
|
|
const responseData = {
|
|
...dataset.toJSON(),
|
|
versionChain: versionChain,
|
|
};
|
|
|
|
// return response.status(StatusCodes.OK).json(dataset);
|
|
return response.status(StatusCodes.OK).json(responseData);
|
|
} catch (error) {
|
|
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
|
|
message: error.message || `Error retrieving Dataset with publish_id=${params.publish_id}.`,
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* GET /:prefix/:value
|
|
* Find dataset by identifier (e.g., https://doi.tethys.at/10.24341/tethys.99.2)
|
|
*/
|
|
public async findByIdentifier({ response, params }: HttpContext) {
|
|
const identifierValue = `${params.prefix}/${params.value}`;
|
|
|
|
// Optional: Validate DOI format
|
|
if (!identifierValue.match(/^10\.\d+\/[a-zA-Z0-9._-]+\.[0-9]+(?:\.[0-9]+)*$/)) {
|
|
return response.status(StatusCodes.BAD_REQUEST).json({
|
|
message: `Invalid DOI format: ${identifierValue}`,
|
|
});
|
|
}
|
|
|
|
try {
|
|
// Method 1: Using subquery with whereIn (most similar to your original)
|
|
const dataset = await Dataset.query()
|
|
// .whereIn('id', (subQuery) => {
|
|
// subQuery.select('dataset_id').from('dataset_identifiers').where('value', identifierValue);
|
|
// })
|
|
.whereHas('identifier', (builder) => {
|
|
builder.where('value', identifierValue);
|
|
})
|
|
.preload('titles')
|
|
.preload('descriptions') // Using 'descriptions' instead of 'abstracts'
|
|
.preload('user', (builder) => {
|
|
builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
|
|
})
|
|
.preload('authors', (builder) => {
|
|
builder
|
|
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
|
.withCount('datasets', (query) => {
|
|
query.as('datasets_count');
|
|
})
|
|
.pivotColumns(['role', 'sort_order'])
|
|
.wherePivot('role', 'author')
|
|
.orderBy('pivot_sort_order', 'asc');
|
|
})
|
|
.preload('contributors', (builder) => {
|
|
builder
|
|
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
|
.withCount('datasets', (query) => {
|
|
query.as('datasets_count');
|
|
})
|
|
.pivotColumns(['role', 'sort_order', 'contributor_type'])
|
|
.wherePivot('role', 'contributor')
|
|
.orderBy('pivot_sort_order', 'asc');
|
|
})
|
|
.preload('subjects')
|
|
.preload('coverage')
|
|
.preload('licenses')
|
|
.preload('references')
|
|
.preload('project')
|
|
// .preload('referenced_by', (builder) => {
|
|
// builder.preload('dataset', (builder) => {
|
|
// builder.preload('identifier');
|
|
// });
|
|
// })
|
|
.preload('files', (builder) => {
|
|
builder.preload('hashvalues');
|
|
})
|
|
.preload('identifier')
|
|
.first();
|
|
|
|
if (!dataset) {
|
|
return response.status(StatusCodes.NOT_FOUND).json({
|
|
message: `Cannot find Dataset with identifier=${identifierValue}.`,
|
|
});
|
|
}
|
|
// Build the version chain
|
|
const versionChain = await this.buildVersionChain(dataset);
|
|
|
|
// Add version chain to response
|
|
const responseData = {
|
|
...dataset.toJSON(),
|
|
versionChain: versionChain,
|
|
};
|
|
|
|
// return response.status(StatusCodes.OK).json(dataset);
|
|
return response.status(StatusCodes.OK).json(responseData);
|
|
} catch (error) {
|
|
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
|
|
message: error.message || `Error retrieving Dataset with identifier=${identifierValue}.`,
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Build the complete version chain for a dataset
|
|
* Traverses both backwards (previous versions) and forwards (newer versions)
|
|
*/
|
|
private async buildVersionChain(dataset: Dataset) {
|
|
const versionChain = {
|
|
// current: {
|
|
// id: dataset.id,
|
|
// publish_id: dataset.publish_id,
|
|
// doi: dataset.identifier?.value || null,
|
|
// main_title: dataset.mainTitle || null,
|
|
// server_date_published: dataset.server_date_published,
|
|
// },
|
|
previousVersions: [] as any[],
|
|
newerVersions: [] as any[],
|
|
};
|
|
|
|
// Get all previous versions (going backwards in time)
|
|
versionChain.previousVersions = await this.getPreviousVersions(dataset.id);
|
|
|
|
// Get all newer versions (going forwards in time)
|
|
versionChain.newerVersions = await this.getNewerVersions(dataset.id);
|
|
|
|
return versionChain;
|
|
}
|
|
|
|
/**
|
|
* Recursively get all previous versions
|
|
*/
|
|
// private async getPreviousVersions(datasetId: number, visited: Set<number> = new Set()): Promise<any[]> {
|
|
// // Prevent infinite loops
|
|
// if (visited.has(datasetId)) {
|
|
// return [];
|
|
// }
|
|
// visited.add(datasetId);
|
|
|
|
// const previousVersions: any[] = [];
|
|
|
|
// // Find references where this dataset "IsNewVersionOf" another dataset
|
|
// const previousRefs = await DatasetReference.query()
|
|
// .where('document_id', datasetId)
|
|
// .where('relation', 'IsNewVersionOf')
|
|
// .whereNotNull('related_document_id');
|
|
|
|
// for (const ref of previousRefs) {
|
|
// if (!ref.related_document_id) continue;
|
|
|
|
// const previousDataset = await Dataset.query()
|
|
// .where('id', ref.related_document_id)
|
|
// .preload('identifier')
|
|
// .preload('titles')
|
|
// .first();
|
|
|
|
// if (previousDataset) {
|
|
// const versionInfo = {
|
|
// id: previousDataset.id,
|
|
// publish_id: previousDataset.publish_id,
|
|
// doi: previousDataset.identifier?.value || null,
|
|
// main_title: previousDataset.mainTitle || null,
|
|
// server_date_published: previousDataset.server_date_published,
|
|
// relation: 'IsPreviousVersionOf', // From perspective of current dataset
|
|
// };
|
|
|
|
// previousVersions.push(versionInfo);
|
|
|
|
// // Recursively get even older versions
|
|
// const olderVersions = await this.getPreviousVersions(previousDataset.id, visited);
|
|
// previousVersions.push(...olderVersions);
|
|
// }
|
|
// }
|
|
|
|
// return previousVersions;
|
|
// }
|
|
|
|
private async getPreviousVersions(datasetId: number, visited: Set<number> = new Set()): Promise<any[]> {
|
|
if (visited.has(datasetId)) return [];
|
|
visited.add(datasetId);
|
|
|
|
const result: any[] = [];
|
|
|
|
// A dataset points to its OLDER version via relation 'IsNewVersionOf'
|
|
const refs = await DatasetReference.query()
|
|
.where('document_id', datasetId)
|
|
.where('relation', 'IsNewVersionOf'); // ← removed .whereNotNull('related_document_id')
|
|
|
|
for (const ref of refs) {
|
|
const related = await this.resolveReferencedDataset(ref, datasetId);
|
|
if (!related) continue;
|
|
|
|
result.push({
|
|
id: related.id,
|
|
publish_id: related.publish_id,
|
|
doi: related.identifier?.value || null,
|
|
main_title: related.mainTitle || null,
|
|
server_date_published: related.server_date_published,
|
|
relation: 'IsPreviousVersionOf',
|
|
});
|
|
|
|
result.push(...(await this.getPreviousVersions(related.id, visited)));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Recursively get all newer versions
|
|
*/
|
|
// private async getNewerVersions(datasetId: number, visited: Set<number> = new Set()): Promise<any[]> {
|
|
// // Prevent infinite loops
|
|
// if (visited.has(datasetId)) {
|
|
// return [];
|
|
// }
|
|
// visited.add(datasetId);
|
|
|
|
// const newerVersions: any[] = [];
|
|
|
|
// // Find references where this dataset "IsPreviousVersionOf" another dataset
|
|
// const newerRefs = await DatasetReference.query()
|
|
// .where('document_id', datasetId)
|
|
// .where('relation', 'IsPreviousVersionOf')
|
|
// .whereNotNull('related_document_id');
|
|
|
|
// for (const ref of newerRefs) {
|
|
// if (!ref.related_document_id) continue;
|
|
|
|
// const newerDataset = await Dataset.query().where('id', ref.related_document_id).preload('identifier').preload('titles').first();
|
|
|
|
// if (newerDataset) {
|
|
// const versionInfo = {
|
|
// id: newerDataset.id,
|
|
// publish_id: newerDataset.publish_id,
|
|
// doi: newerDataset.identifier?.value || null,
|
|
// main_title: newerDataset.mainTitle || null,
|
|
// server_date_published: newerDataset.server_date_published,
|
|
// relation: 'IsNewVersionOf', // From perspective of current dataset
|
|
// };
|
|
|
|
// newerVersions.push(versionInfo);
|
|
|
|
// // Recursively get even newer versions
|
|
// const evenNewerVersions = await this.getNewerVersions(newerDataset.id, visited);
|
|
// newerVersions.push(...evenNewerVersions);
|
|
// }
|
|
// }
|
|
|
|
// return newerVersions;
|
|
// }
|
|
private async getNewerVersions(datasetId: number, visited: Set<number> = new Set()): Promise<any[]> {
|
|
if (visited.has(datasetId)) return [];
|
|
visited.add(datasetId);
|
|
|
|
const result: any[] = [];
|
|
|
|
// A dataset points to its NEWER version via relation 'IsPreviousVersionOf'
|
|
const refs = await DatasetReference.query()
|
|
.where('document_id', datasetId)
|
|
.where('relation', 'IsPreviousVersionOf'); // ← removed .whereNotNull(...)
|
|
|
|
for (const ref of refs) {
|
|
const related = await this.resolveReferencedDataset(ref, datasetId);
|
|
if (!related) continue;
|
|
|
|
result.push({
|
|
id: related.id,
|
|
publish_id: related.publish_id,
|
|
doi: related.identifier?.value || null,
|
|
main_title: related.mainTitle || null,
|
|
server_date_published: related.server_date_published,
|
|
relation: 'IsNewVersionOf',
|
|
});
|
|
|
|
result.push(...(await this.getNewerVersions(related.id, visited)));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
private async resolveReferencedDataset(ref: DatasetReference, currentDatasetId: number) {
|
|
const doi = this.normalizeDoi(ref.value);
|
|
|
|
if (doi) {
|
|
const byDoi = await Dataset.query()
|
|
.whereHas('identifier', (q) => q.where('value', doi))
|
|
.preload('identifier')
|
|
.preload('titles') // needed so mainTitle computes
|
|
.first();
|
|
if (byDoi) return byDoi;
|
|
}
|
|
|
|
if (ref.related_document_id && ref.related_document_id !== currentDatasetId) {
|
|
return await Dataset.query()
|
|
.where('id', ref.related_document_id)
|
|
.preload('identifier')
|
|
.preload('titles')
|
|
.first();
|
|
}
|
|
|
|
return null;
|
|
}
|
|
private normalizeDoi(value: string | null): string | null {
|
|
if (!value) return null;
|
|
return value
|
|
.trim()
|
|
.replace(/^https?:\/\/(dx\.)?doi\.org\//i, '')
|
|
.replace(/^doi:/i, '');
|
|
}
|
|
}
|