- feat: Enhance README with setup instructions, usage, and command documentation
- fix: Update API routes to include DOI URL handling and improve route organization - chore: Add ORCID preload rule file and ensure proper registration - docs: Add MIT License to the project for open-source compliance - feat: Implement command to detect and fix missing dataset cross-references - feat: Create command for updating DataCite DOI records with detailed logging and error handling - docs: Add comprehensive documentation for dataset indexing command - docs: Create detailed documentation for DataCite update command with usage examples and error handling
This commit is contained in:
parent
8f67839f93
commit
c049b22723
11 changed files with 2187 additions and 555 deletions
22
LICENSE
Normal file
22
LICENSE
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Tethys Research Repository
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE
|
||||
|
|
@ -1,23 +1,35 @@
|
|||
import type { HttpContext } from '@adonisjs/core/http';
|
||||
// import Person from 'App/Models/Person';
|
||||
import Dataset from '#models/dataset';
|
||||
import { StatusCodes } from 'http-status-codes';
|
||||
|
||||
// node ace make:controller Author
|
||||
export default class DatasetController {
|
||||
public async index({}: HttpContext) {
|
||||
// Select datasets with server_state 'published' or 'deleted' and sort by the last published date
|
||||
const datasets = await Dataset.query()
|
||||
.where(function (query) {
|
||||
query.where('server_state', 'published').orWhere('server_state', 'deleted');
|
||||
})
|
||||
.preload('titles')
|
||||
.preload('identifier')
|
||||
.orderBy('server_date_published', 'desc');
|
||||
/**
|
||||
* GET /api/datasets
|
||||
* Find all published datasets
|
||||
*/
|
||||
public async index({ response }: HttpContext) {
|
||||
try {
|
||||
const datasets = await Dataset.query()
|
||||
.where(function (query) {
|
||||
query.where('server_state', 'published').orWhere('server_state', 'deleted');
|
||||
})
|
||||
.preload('titles')
|
||||
.preload('identifier')
|
||||
.orderBy('server_date_published', 'desc');
|
||||
|
||||
return datasets;
|
||||
return response.status(StatusCodes.OK).json(datasets);
|
||||
} catch (error) {
|
||||
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
|
||||
message: error.message || 'Some error occurred while retrieving datasets.',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* GET /api/dataset
|
||||
* Find all published datasets
|
||||
*/
|
||||
public async findAll({ response }: HttpContext) {
|
||||
try {
|
||||
const datasets = await Dataset.query()
|
||||
|
|
@ -33,48 +45,142 @@ export default class DatasetController {
|
|||
}
|
||||
}
|
||||
|
||||
public async findOne({ params }: HttpContext) {
|
||||
const datasets = await Dataset.query()
|
||||
.where('publish_id', params.publish_id)
|
||||
.preload('titles')
|
||||
.preload('descriptions')
|
||||
.preload('user', (builder) => {
|
||||
builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
|
||||
})
|
||||
.preload('authors', (builder) => {
|
||||
builder
|
||||
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
||||
.withCount('datasets', (query) => {
|
||||
query.as('datasets_count');
|
||||
})
|
||||
.pivotColumns(['role', 'sort_order'])
|
||||
.orderBy('pivot_sort_order', 'asc');
|
||||
})
|
||||
.preload('contributors', (builder) => {
|
||||
builder
|
||||
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
||||
.withCount('datasets', (query) => {
|
||||
query.as('datasets_count');
|
||||
})
|
||||
.pivotColumns(['role', 'sort_order', 'contributor_type'])
|
||||
.orderBy('pivot_sort_order', 'asc');
|
||||
})
|
||||
.preload('subjects')
|
||||
.preload('coverage')
|
||||
.preload('licenses')
|
||||
.preload('references')
|
||||
.preload('project')
|
||||
.preload('referenced_by', (builder) => {
|
||||
builder.preload('dataset', (builder) => {
|
||||
builder.preload('identifier');
|
||||
});
|
||||
})
|
||||
.preload('files', (builder) => {
|
||||
builder.preload('hashvalues');
|
||||
})
|
||||
.preload('identifier')
|
||||
.firstOrFail();
|
||||
/**
|
||||
* GET /api/dataset/:publish_id
|
||||
* Find one dataset by publish_id
|
||||
*/
|
||||
public async findOne({ response, params }: HttpContext) {
|
||||
try {
|
||||
const dataset = await Dataset.query()
|
||||
.where('publish_id', params.publish_id)
|
||||
.preload('titles')
|
||||
.preload('descriptions') // Using 'descriptions' instead of 'abstracts'
|
||||
.preload('user', (builder) => {
|
||||
builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
|
||||
})
|
||||
.preload('authors', (builder) => {
|
||||
builder
|
||||
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
||||
.withCount('datasets', (query) => {
|
||||
query.as('datasets_count');
|
||||
})
|
||||
.pivotColumns(['role', 'sort_order'])
|
||||
.orderBy('pivot_sort_order', 'asc');
|
||||
})
|
||||
.preload('contributors', (builder) => {
|
||||
builder
|
||||
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
||||
.withCount('datasets', (query) => {
|
||||
query.as('datasets_count');
|
||||
})
|
||||
.pivotColumns(['role', 'sort_order', 'contributor_type'])
|
||||
.orderBy('pivot_sort_order', 'asc');
|
||||
})
|
||||
.preload('subjects')
|
||||
.preload('coverage')
|
||||
.preload('licenses')
|
||||
.preload('references')
|
||||
.preload('project')
|
||||
.preload('referenced_by', (builder) => {
|
||||
builder.preload('dataset', (builder) => {
|
||||
builder.preload('identifier');
|
||||
});
|
||||
})
|
||||
.preload('files', (builder) => {
|
||||
builder.preload('hashvalues');
|
||||
})
|
||||
.preload('identifier')
|
||||
.first(); // Use first() instead of firstOrFail() to handle not found gracefully
|
||||
|
||||
return datasets;
|
||||
if (!dataset) {
|
||||
return response.status(StatusCodes.NOT_FOUND).json({
|
||||
message: `Cannot find Dataset with publish_id=${params.publish_id}.`,
|
||||
});
|
||||
}
|
||||
|
||||
return response.status(StatusCodes.OK).json(dataset);
|
||||
} catch (error) {
|
||||
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
|
||||
message: error.message || `Error retrieving Dataset with publish_id=${params.publish_id}.`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* GET /:prefix/:value
|
||||
* Find dataset by identifier (e.g., https://doi.tethys.at/10.24341/tethys.99.2)
|
||||
*/
|
||||
public async findByIdentifier({ response, params }: HttpContext) {
|
||||
const identifierValue = `${params.prefix}/${params.value}`;
|
||||
|
||||
// Optional: Validate DOI format
|
||||
if (!identifierValue.match(/^10\.\d+\/[a-zA-Z0-9._-]+\.[0-9]+(?:\.[0-9]+)*$/)) {
|
||||
return response.status(StatusCodes.BAD_REQUEST).json({
|
||||
message: `Invalid DOI format: ${identifierValue}`,
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
// Method 1: Using subquery with whereIn (most similar to your original)
|
||||
const dataset = await Dataset.query()
|
||||
// .whereIn('id', (subQuery) => {
|
||||
// subQuery.select('dataset_id').from('dataset_identifiers').where('value', identifierValue);
|
||||
// })
|
||||
.whereHas('identifier', (builder) => {
|
||||
builder.where('value', identifierValue);
|
||||
})
|
||||
.preload('titles')
|
||||
.preload('descriptions') // Using 'descriptions' instead of 'abstracts'
|
||||
.preload('user', (builder) => {
|
||||
builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
|
||||
})
|
||||
.preload('authors', (builder) => {
|
||||
builder
|
||||
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
||||
.withCount('datasets', (query) => {
|
||||
query.as('datasets_count');
|
||||
})
|
||||
.pivotColumns(['role', 'sort_order'])
|
||||
.wherePivot('role', 'author')
|
||||
.orderBy('pivot_sort_order', 'asc');
|
||||
})
|
||||
.preload('contributors', (builder) => {
|
||||
builder
|
||||
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
||||
.withCount('datasets', (query) => {
|
||||
query.as('datasets_count');
|
||||
})
|
||||
.pivotColumns(['role', 'sort_order', 'contributor_type'])
|
||||
.wherePivot('role', 'contributor')
|
||||
.orderBy('pivot_sort_order', 'asc');
|
||||
})
|
||||
.preload('subjects')
|
||||
.preload('coverage')
|
||||
.preload('licenses')
|
||||
.preload('references')
|
||||
.preload('project')
|
||||
.preload('referenced_by', (builder) => {
|
||||
builder.preload('dataset', (builder) => {
|
||||
builder.preload('identifier');
|
||||
});
|
||||
})
|
||||
.preload('files', (builder) => {
|
||||
builder.preload('hashvalues');
|
||||
})
|
||||
.preload('identifier')
|
||||
.first();
|
||||
|
||||
if (!dataset) {
|
||||
return response.status(StatusCodes.NOT_FOUND).json({
|
||||
message: `Cannot find Dataset with identifier=${identifierValue}.`,
|
||||
});
|
||||
}
|
||||
|
||||
return response.status(StatusCodes.OK).json(dataset);
|
||||
} catch (error) {
|
||||
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
|
||||
message: error.message || `Error retrieving Dataset with identifier=${identifierValue}.`,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,3 @@
|
|||
// import { Client } from 'guzzle';
|
||||
// import { Log } from '@adonisjs/core/build/standalone';
|
||||
// import { DoiInterface } from './interfaces/DoiInterface';
|
||||
import DoiClientContract from '#app/Library/Doi/DoiClientContract';
|
||||
import DoiClientException from '#app/exceptions/DoiClientException';
|
||||
import { StatusCodes } from 'http-status-codes';
|
||||
|
|
@ -12,14 +9,14 @@ export class DoiClient implements DoiClientContract {
|
|||
public username: string;
|
||||
public password: string;
|
||||
public serviceUrl: string;
|
||||
public apiUrl: string;
|
||||
|
||||
constructor() {
|
||||
// const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug';
|
||||
this.username = process.env.DATACITE_USERNAME || '';
|
||||
this.password = process.env.DATACITE_PASSWORD || '';
|
||||
this.serviceUrl = process.env.DATACITE_SERVICE_URL || '';
|
||||
// this.prefix = process.env.DATACITE_PREFIX || '';
|
||||
// this.base_domain = process.env.BASE_DOMAIN || '';
|
||||
this.apiUrl = process.env.DATACITE_API_URL || 'https://api.datacite.org';
|
||||
|
||||
if (this.username === '' || this.password === '' || this.serviceUrl === '') {
|
||||
const message = 'issing configuration settings to properly initialize DOI client';
|
||||
|
|
@ -90,4 +87,240 @@ export class DoiClient implements DoiClientContract {
|
|||
throw new DoiClientException(error.response.status, error.response.data);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves DOI information from DataCite REST API
|
||||
*
|
||||
* @param doiValue The DOI identifier e.g. '10.5072/tethys.999'
|
||||
* @returns Promise with DOI information or null if not found
|
||||
*/
|
||||
public async getDoiInfo(doiValue: string): Promise<any | null> {
|
||||
try {
|
||||
// Use configurable DataCite REST API URL
|
||||
const dataciteApiUrl = `${this.apiUrl}/dois/${doiValue}`;
|
||||
const response = await axios.get(dataciteApiUrl, {
|
||||
headers: {
|
||||
Accept: 'application/vnd.api+json',
|
||||
},
|
||||
});
|
||||
|
||||
if (response.status === 200 && response.data.data) {
|
||||
return {
|
||||
created: response.data.data.attributes.created,
|
||||
registered: response.data.data.attributes.registered,
|
||||
updated: response.data.data.attributes.updated,
|
||||
published: response.data.data.attributes.published,
|
||||
state: response.data.data.attributes.state,
|
||||
url: response.data.data.attributes.url,
|
||||
metadata: response.data.data.attributes,
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.response?.status === 404) {
|
||||
logger.debug(`DOI ${doiValue} not found in DataCite`);
|
||||
return null;
|
||||
}
|
||||
|
||||
logger.debug(`DataCite REST API failed for ${doiValue}: ${error.message}`);
|
||||
|
||||
// Fallback to MDS API
|
||||
return await this.getDoiInfoFromMds(doiValue);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback method to get DOI info from MDS API
|
||||
*
|
||||
* @param doiValue The DOI identifier
|
||||
* @returns Promise with basic DOI information or null
|
||||
*/
|
||||
private async getDoiInfoFromMds(doiValue: string): Promise<any | null> {
|
||||
try {
|
||||
const auth = {
|
||||
username: this.username,
|
||||
password: this.password,
|
||||
};
|
||||
|
||||
// Get DOI URL
|
||||
const doiResponse = await axios.get(`${this.serviceUrl}/doi/${doiValue}`, { auth });
|
||||
|
||||
if (doiResponse.status === 200) {
|
||||
// Get metadata if available
|
||||
try {
|
||||
const metadataResponse = await axios.get(`${this.serviceUrl}/metadata/${doiValue}`, {
|
||||
auth,
|
||||
headers: {
|
||||
Accept: 'application/xml',
|
||||
},
|
||||
});
|
||||
|
||||
return {
|
||||
url: doiResponse.data.trim(),
|
||||
metadata: metadataResponse.data,
|
||||
created: new Date().toISOString(), // MDS doesn't provide creation dates
|
||||
registered: new Date().toISOString(), // Use current time as fallback
|
||||
source: 'mds',
|
||||
};
|
||||
} catch (metadataError) {
|
||||
// Return basic info even if metadata fetch fails
|
||||
return {
|
||||
url: doiResponse.data.trim(),
|
||||
created: new Date().toISOString(),
|
||||
registered: new Date().toISOString(),
|
||||
source: 'mds',
|
||||
};
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.response?.status === 404) {
|
||||
logger.debug(`DOI ${doiValue} not found in DataCite MDS`);
|
||||
return null;
|
||||
}
|
||||
|
||||
logger.debug(`DataCite MDS API failed for ${doiValue}: ${error.message}`);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a DOI exists in DataCite
|
||||
*
|
||||
* @param doiValue The DOI identifier
|
||||
* @returns Promise<boolean> True if DOI exists
|
||||
*/
|
||||
public async doiExists(doiValue: string): Promise<boolean> {
|
||||
const doiInfo = await this.getDoiInfo(doiValue);
|
||||
return doiInfo !== null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the last modification date of a DOI
|
||||
*
|
||||
* @param doiValue The DOI identifier
|
||||
* @returns Promise<Date | null> Last modification date or creation date if never updated, null if not found
|
||||
*/
|
||||
public async getDoiLastModified(doiValue: string): Promise<Date | null> {
|
||||
const doiInfo = await this.getDoiInfo(doiValue);
|
||||
|
||||
if (doiInfo) {
|
||||
// Use updated date if available, otherwise fall back to created/registered date
|
||||
const dateToUse = doiInfo.updated || doiInfo.registered || doiInfo.created;
|
||||
|
||||
if (dateToUse) {
|
||||
logger.debug(
|
||||
`DOI ${doiValue}: Using ${doiInfo.updated ? 'updated' : doiInfo.registered ? 'registered' : 'created'} date: ${dateToUse}`,
|
||||
);
|
||||
return new Date(dateToUse);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a DOI unfindable (registered but not discoverable)
|
||||
* Note: DOIs cannot be deleted, only made unfindable
|
||||
* await doiClient.makeDoiUnfindable('10.21388/tethys.231');
|
||||
*
|
||||
* @param doiValue The DOI identifier e.g. '10.5072/tethys.999'
|
||||
* @returns Promise<AxiosResponse<any>> The http response
|
||||
*/
|
||||
public async makeDoiUnfindable(doiValue: string): Promise<AxiosResponse<any>> {
|
||||
const auth = {
|
||||
username: this.username,
|
||||
password: this.password,
|
||||
};
|
||||
|
||||
try {
|
||||
// First, check if DOI exists
|
||||
const exists = await this.doiExists(doiValue);
|
||||
if (!exists) {
|
||||
throw new DoiClientException(404, `DOI ${doiValue} not found`);
|
||||
}
|
||||
|
||||
// Delete the DOI URL mapping to make it unfindable
|
||||
// This removes the URL but keeps the metadata registered
|
||||
const response = await axios.delete(`${this.serviceUrl}/doi/${doiValue}`, { auth });
|
||||
|
||||
// Response Codes for DELETE /doi/{doi}
|
||||
// 200 OK: operation successful
|
||||
// 401 Unauthorized: no login
|
||||
// 403 Forbidden: login problem, quota exceeded
|
||||
// 404 Not Found: DOI does not exist
|
||||
if (response.status !== 200) {
|
||||
const message = `Unexpected DataCite MDS response code ${response.status}`;
|
||||
logger.error(message);
|
||||
throw new DoiClientException(response.status, message);
|
||||
}
|
||||
|
||||
logger.info(`DOI ${doiValue} successfully made unfindable`);
|
||||
return response;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to make DOI ${doiValue} unfindable: ${error.message}`);
|
||||
if (error instanceof DoiClientException) {
|
||||
throw error;
|
||||
}
|
||||
throw new DoiClientException(error.response?.status || 500, error.response?.data || error.message);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a DOI findable again by re-registering the URL
|
||||
* await doiClient.makeDoiFindable(
|
||||
* '10.21388/tethys.231',
|
||||
* 'https://doi.dev.tethys.at/10.21388/tethys.231'
|
||||
* );
|
||||
*
|
||||
* @param doiValue The DOI identifier e.g. '10.5072/tethys.999'
|
||||
* @param landingPageUrl The landing page URL
|
||||
* @returns Promise<AxiosResponse<any>> The http response
|
||||
*/
|
||||
public async makeDoiFindable(doiValue: string, landingPageUrl: string): Promise<AxiosResponse<any>> {
|
||||
const auth = {
|
||||
username: this.username,
|
||||
password: this.password,
|
||||
};
|
||||
|
||||
try {
|
||||
// Re-register the DOI with its URL to make it findable again
|
||||
const response = await axios.put(`${this.serviceUrl}/doi/${doiValue}`, `doi=${doiValue}\nurl=${landingPageUrl}`, { auth });
|
||||
|
||||
// Response Codes for PUT /doi/{doi}
|
||||
// 201 Created: operation successful
|
||||
// 400 Bad Request: request body must be exactly two lines: DOI and URL
|
||||
// 401 Unauthorized: no login
|
||||
// 403 Forbidden: login problem, quota exceeded
|
||||
// 412 Precondition failed: metadata must be uploaded first
|
||||
if (response.status !== 201) {
|
||||
const message = `Unexpected DataCite MDS response code ${response.status}`;
|
||||
logger.error(message);
|
||||
throw new DoiClientException(response.status, message);
|
||||
}
|
||||
|
||||
logger.info(`DOI ${doiValue} successfully made findable again`);
|
||||
return response;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to make DOI ${doiValue} findable: ${error.message}`);
|
||||
if (error instanceof DoiClientException) {
|
||||
throw error;
|
||||
}
|
||||
throw new DoiClientException(error.response?.status || 500, error.response?.data || error.message);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current state of a DOI (draft, registered, findable)
|
||||
* const state = await doiClient.getDoiState('10.21388/tethys.231');
|
||||
* console.log(`Current state: ${state}`); // 'findable'
|
||||
*
|
||||
* @param doiValue The DOI identifier
|
||||
* @returns Promise<string | null> The DOI state or null if not found
|
||||
*/
|
||||
public async getDoiState(doiValue: string): Promise<string | null> {
|
||||
const doiInfo = await this.getDoiInfo(doiValue);
|
||||
return doiInfo?.state || null;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
317
commands/fix_dataset_cross_references.ts
Normal file
317
commands/fix_dataset_cross_references.ts
Normal file
|
|
@ -0,0 +1,317 @@
|
|||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| node ace make:command fix-dataset-cross-references
|
||||
| DONE: create commands/fix_dataset_cross_references.ts
|
||||
|--------------------------------------------------------------------------
|
||||
*/
|
||||
import { BaseCommand, flags } from '@adonisjs/core/ace';
|
||||
import type { CommandOptions } from '@adonisjs/core/types/ace';
|
||||
import Dataset from '#models/dataset';
|
||||
import DatasetReference from '#models/dataset_reference';
|
||||
// import env from '#start/env';
|
||||
|
||||
interface MissingCrossReference {
|
||||
sourceDatasetId: number;
|
||||
targetDatasetId: number;
|
||||
sourcePublishId: number | null;
|
||||
targetPublishId: number | null;
|
||||
referenceType: string;
|
||||
relation: string;
|
||||
doi: string | null;
|
||||
reverseRelation: string;
|
||||
}
|
||||
|
||||
export default class DetectMissingCrossReferences extends BaseCommand {
|
||||
static commandName = 'detect:missing-cross-references';
|
||||
static description = 'Detect missing bidirectional cross-references between versioned datasets';
|
||||
|
||||
public static needsApplication = true;
|
||||
|
||||
@flags.boolean({ alias: 'f', description: 'Fix missing cross-references automatically' })
|
||||
public fix: boolean = false;
|
||||
|
||||
@flags.boolean({ alias: 'v', description: 'Verbose output' })
|
||||
public verbose: boolean = false;
|
||||
|
||||
public static options: CommandOptions = {
|
||||
startApp: true,
|
||||
staysAlive: false,
|
||||
};
|
||||
|
||||
async run() {
|
||||
this.logger.info('🔍 Detecting missing cross-references...');
|
||||
|
||||
try {
|
||||
const missingReferences = await this.findMissingCrossReferences();
|
||||
|
||||
if (missingReferences.length === 0) {
|
||||
this.logger.success('All cross-references are properly linked!');
|
||||
return;
|
||||
}
|
||||
|
||||
this.logger.warning(`Found ${missingReferences.length} missing cross-reference(s):`);
|
||||
|
||||
for (const missing of missingReferences) {
|
||||
this.logger.info(
|
||||
`Dataset ${missing.sourceDatasetId} references ${missing.targetDatasetId}, but reverse reference is missing`,
|
||||
);
|
||||
|
||||
if (this.verbose) {
|
||||
this.logger.info(` - Reference type: ${missing.referenceType}`);
|
||||
this.logger.info(` - Relation: ${missing.relation}`);
|
||||
this.logger.info(` - DOI: ${missing.doi}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (this.fix) {
|
||||
await this.fixMissingReferences(missingReferences);
|
||||
this.logger.success('All missing cross-references have been fixed!');
|
||||
} else {
|
||||
this.printMissingReferencesList(missingReferences);
|
||||
this.logger.info('💡 Run with --fix flag to automatically create missing cross-references');
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error('Error detecting missing cross-references:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
private async findMissingCrossReferences(): Promise<MissingCrossReference[]> {
|
||||
const missingReferences: {
|
||||
sourceDatasetId: number;
|
||||
targetDatasetId: number;
|
||||
sourcePublishId: number | null;
|
||||
targetPublishId: number | null;
|
||||
referenceType: string;
|
||||
relation: string;
|
||||
doi: string | null;
|
||||
reverseRelation: string;
|
||||
}[] = [];
|
||||
|
||||
this.logger.info('📊 Querying dataset references...');
|
||||
|
||||
// Find all references that point to Tethys datasets (DOI or URL containing tethys DOI)
|
||||
// Only from datasets that are published
|
||||
const tethysReferences = await DatasetReference.query()
|
||||
.whereIn('type', ['DOI', 'URL'])
|
||||
.where((query) => {
|
||||
query.where('value', 'like', '%doi.org/10.24341/tethys.%').orWhere('value', 'like', '%tethys.at/dataset/%');
|
||||
})
|
||||
.preload('dataset', (datasetQuery) => {
|
||||
datasetQuery.where('server_state', 'published');
|
||||
})
|
||||
.whereHas('dataset', (datasetQuery) => {
|
||||
datasetQuery.where('server_state', 'published');
|
||||
});
|
||||
|
||||
this.logger.info(`🔗 Found ${tethysReferences.length} Tethys references from published datasets`);
|
||||
|
||||
let processedCount = 0;
|
||||
for (const reference of tethysReferences) {
|
||||
processedCount++;
|
||||
|
||||
if (this.verbose && processedCount % 10 === 0) {
|
||||
this.logger.info(`📈 Processed ${processedCount}/${tethysReferences.length} references...`);
|
||||
}
|
||||
|
||||
// Extract dataset publish_id from DOI or URL
|
||||
const targetDatasetPublish = this.extractDatasetPublishIdFromReference(reference.value);
|
||||
|
||||
if (!targetDatasetPublish) {
|
||||
if (this.verbose) {
|
||||
this.logger.warning(`⚠️ Could not extract publish ID from: ${reference.value}`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if target dataset exists and is published
|
||||
const targetDataset = await Dataset.query()
|
||||
.where('publish_id', targetDatasetPublish)
|
||||
.where('server_state', 'published')
|
||||
.first();
|
||||
|
||||
if (!targetDataset) {
|
||||
if (this.verbose) {
|
||||
this.logger.warning(`⚠️ Target dataset with publish_id ${targetDatasetPublish} not found or not published`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Ensure we have a valid source dataset with proper preloading
|
||||
if (!reference.dataset) {
|
||||
this.logger.warning(`⚠️ Source dataset ${reference.document_id} not properly loaded, skipping...`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if reverse reference exists
|
||||
const reverseReferenceExists = await this.checkReverseReferenceExists(
|
||||
targetDataset.id,
|
||||
reference.document_id,
|
||||
reference.relation,
|
||||
);
|
||||
|
||||
if (!reverseReferenceExists) {
|
||||
missingReferences.push({
|
||||
sourceDatasetId: reference.document_id,
|
||||
targetDatasetId: targetDataset.id,
|
||||
sourcePublishId: reference.dataset.publish_id || null,
|
||||
targetPublishId: targetDataset.publish_id || null,
|
||||
referenceType: reference.type,
|
||||
relation: reference.relation,
|
||||
doi: reference.value,
|
||||
reverseRelation: this.getReverseRelation(reference.relation),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.info(`✅ Processed all ${processedCount} references`);
|
||||
return missingReferences;
|
||||
}
|
||||
|
||||
private extractDatasetPublishIdFromReference(value: string): number | null {
|
||||
// Extract from DOI: https://doi.org/10.24341/tethys.107 -> 107
|
||||
const doiMatch = value.match(/10\.24341\/tethys\.(\d+)/);
|
||||
if (doiMatch) {
|
||||
return parseInt(doiMatch[1]);
|
||||
}
|
||||
|
||||
// Extract from URL: https://tethys.at/dataset/107 -> 107
|
||||
const urlMatch = value.match(/tethys\.at\/dataset\/(\d+)/);
|
||||
if (urlMatch) {
|
||||
return parseInt(urlMatch[1]);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private async checkReverseReferenceExists(
|
||||
sourceDatasetId: number,
|
||||
targetDatasetId: number,
|
||||
originalRelation: string,
|
||||
): Promise<boolean> {
|
||||
const reverseRelation = this.getReverseRelation(originalRelation);
|
||||
|
||||
// Only check for reverse references where the source dataset is also published
|
||||
const reverseReference = await DatasetReference.query()
|
||||
.where('document_id', sourceDatasetId)
|
||||
.where('related_document_id', targetDatasetId)
|
||||
.where('relation', reverseRelation)
|
||||
.whereHas('dataset', (datasetQuery) => {
|
||||
datasetQuery.where('server_state', 'published');
|
||||
})
|
||||
.first();
|
||||
|
||||
return !!reverseReference;
|
||||
}
|
||||
|
||||
private getReverseRelation(relation: string): string {
|
||||
const relationMap: Record<string, string> = {
|
||||
IsNewVersionOf: 'IsPreviousVersionOf',
|
||||
IsPreviousVersionOf: 'IsNewVersionOf',
|
||||
|
||||
IsVersionOf: 'HasVersion',
|
||||
HasVersion: 'IsVersionOf',
|
||||
|
||||
Compiles: 'IsCompiledBy',
|
||||
IsCompiledBy: 'Compiles',
|
||||
|
||||
IsVariantFormOf: 'IsOriginalFormOf',
|
||||
IsOriginalFormOf: 'IsVariantFormOf',
|
||||
|
||||
IsPartOf: 'HasPart',
|
||||
HasPart: 'IsPartOf',
|
||||
|
||||
IsSupplementTo: 'IsSupplementedBy',
|
||||
IsSupplementedBy: 'IsSupplementTo',
|
||||
|
||||
Continues: 'IsContinuedBy',
|
||||
IsContinuedBy: 'Continues',
|
||||
};
|
||||
|
||||
// to catch relation types like 'compiles' or 'IsVariantFormOf' that are not in the map mark reverse as 'HasVersion'
|
||||
return relationMap[relation] || 'HasVersion'; // Default fallback
|
||||
}
|
||||
|
||||
private printMissingReferencesList(missingReferences: MissingCrossReference[]) {
|
||||
console.log('┌─────────────────────────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ MISSING CROSS-REFERENCES REPORT │');
|
||||
console.log('│ (Published Datasets Only) │');
|
||||
console.log('└─────────────────────────────────────────────────────────────────────────────────┘');
|
||||
console.log();
|
||||
|
||||
missingReferences.forEach((missing, index) => {
|
||||
console.log(
|
||||
`${index + 1}. Dataset ${missing.sourceDatasetId} (Publish ID: ${missing.sourcePublishId}) → Dataset ${missing.targetDatasetId} (Publish ID: ${missing.targetPublishId})`,
|
||||
);
|
||||
console.log(` ├─ Current relation: "${missing.relation}"`);
|
||||
console.log(` ├─ Missing reverse relation: "${missing.reverseRelation}"`);
|
||||
console.log(` ├─ Reference type: ${missing.referenceType}`);
|
||||
console.log(` └─ DOI/URL: ${missing.doi}`);
|
||||
console.log();
|
||||
});
|
||||
|
||||
console.log('┌─────────────────────────────────────────────────────────────────────────────────┐');
|
||||
console.log(`│ SUMMARY: ${missingReferences.length} missing reverse reference(s) detected │`);
|
||||
console.log('└─────────────────────────────────────────────────────────────────────────────────┘');
|
||||
}
|
||||
|
||||
private async fixMissingReferences(missingReferences: MissingCrossReference[]) {
|
||||
this.logger.info('🔧 Creating missing cross-references in database...');
|
||||
|
||||
let fixedCount = 0;
|
||||
let errorCount = 0;
|
||||
|
||||
for (const [index, missing] of missingReferences.entries()) {
|
||||
try {
|
||||
// Get the source dataset to create proper reference - ensure it's published
|
||||
const sourceDataset = await Dataset.query()
|
||||
.where('id', missing.sourceDatasetId)
|
||||
.where('server_state', 'published')
|
||||
.preload('identifier')
|
||||
.first();
|
||||
|
||||
if (!sourceDataset) {
|
||||
this.logger.warning(`⚠️ Source dataset ${missing.sourceDatasetId} not found or not published, skipping...`);
|
||||
errorCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create the reverse reference
|
||||
const reverseReference = new DatasetReference();
|
||||
reverseReference.document_id = missing.targetDatasetId;
|
||||
reverseReference.related_document_id = missing.sourceDatasetId;
|
||||
reverseReference.type = 'DOI';
|
||||
reverseReference.relation = missing.reverseRelation;
|
||||
|
||||
// Use the source dataset's DOI for the value
|
||||
if (sourceDataset.identifier?.value) {
|
||||
reverseReference.value = `https://doi.org/${sourceDataset.identifier.value}`;
|
||||
} else {
|
||||
// Fallback to dataset URL if no DOI
|
||||
reverseReference.value = `https://tethys.at/dataset/${sourceDataset.publish_id || missing.sourceDatasetId}`;
|
||||
}
|
||||
|
||||
// Use the source dataset's main title for the label
|
||||
reverseReference.label = sourceDataset.mainTitle || `Dataset ${missing.sourceDatasetId}`;
|
||||
|
||||
await reverseReference.save();
|
||||
fixedCount++;
|
||||
|
||||
if (this.verbose) {
|
||||
this.logger.info(
|
||||
`✅ [${index + 1}/${missingReferences.length}] Created reverse reference: Dataset ${missing.targetDatasetId} -> ${missing.sourceDatasetId}`,
|
||||
);
|
||||
} else if ((index + 1) % 10 === 0) {
|
||||
this.logger.info(`📈 Fixed ${fixedCount}/${missingReferences.length} references...`);
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
`❌ Error creating reverse reference for datasets ${missing.targetDatasetId} -> ${missing.sourceDatasetId}:`,
|
||||
error,
|
||||
);
|
||||
errorCount++;
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.info(`📊 Fix completed: ${fixedCount} created, ${errorCount} errors`);
|
||||
}
|
||||
}
|
||||
271
commands/update_datacite.ts
Normal file
271
commands/update_datacite.ts
Normal file
|
|
@ -0,0 +1,271 @@
|
|||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| node ace make:command update-datacite
|
||||
| DONE: create commands/update_datacite.ts
|
||||
|--------------------------------------------------------------------------
|
||||
*/
|
||||
import { BaseCommand, flags } from '@adonisjs/core/ace';
|
||||
import { CommandOptions } from '@adonisjs/core/types/ace';
|
||||
import Dataset from '#models/dataset';
|
||||
import { DoiClient } from '#app/Library/Doi/DoiClient';
|
||||
import DoiClientException from '#app/exceptions/DoiClientException';
|
||||
import Index from '#app/Library/Utils/Index';
|
||||
import env from '#start/env';
|
||||
import logger from '@adonisjs/core/services/logger';
|
||||
import { DateTime } from 'luxon';
|
||||
import { getDomain } from '#app/utils/utility-functions';
|
||||
|
||||
export default class UpdateDatacite extends BaseCommand {
|
||||
static commandName = 'update:datacite';
|
||||
static description = 'Update DataCite DOI records for published datasets';
|
||||
|
||||
public static needsApplication = true;
|
||||
|
||||
@flags.number({ alias: 'p', description: 'Specific publish_id to update' })
|
||||
public publish_id: number;
|
||||
|
||||
@flags.boolean({ alias: 'f', description: 'Force update all records regardless of modification date' })
|
||||
public force: boolean = false;
|
||||
|
||||
@flags.boolean({ alias: 'd', description: 'Dry run - show what would be updated without making changes' })
|
||||
public dryRun: boolean = false;
|
||||
|
||||
@flags.boolean({ alias: 's', description: 'Show detailed stats for each dataset that needs updating' })
|
||||
public stats: boolean = false;
|
||||
|
||||
//example: node ace update:datacite -p 123 --force --dry-run
|
||||
|
||||
public static options: CommandOptions = {
|
||||
startApp: true, // Whether to boot the application before running the command
|
||||
stayAlive: false, // Whether to keep the process alive after the command has executed
|
||||
};
|
||||
|
||||
async run() {
|
||||
logger.info('Starting DataCite update process...');
|
||||
|
||||
const prefix = env.get('DATACITE_PREFIX', '');
|
||||
const base_domain = env.get('BASE_DOMAIN', '');
|
||||
const apiUrl = env.get('DATACITE_API_URL', 'https://api.datacite.org');
|
||||
|
||||
if (!prefix || !base_domain) {
|
||||
logger.error('Missing DATACITE_PREFIX or BASE_DOMAIN environment variables');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Using DataCite API: ${apiUrl}`);
|
||||
|
||||
const datasets = await this.getDatasets();
|
||||
logger.info(`Found ${datasets.length} datasets to process`);
|
||||
|
||||
let updated = 0;
|
||||
let skipped = 0;
|
||||
let errors = 0;
|
||||
|
||||
for (const dataset of datasets) {
|
||||
try {
|
||||
const shouldUpdate = this.force || (await this.shouldUpdateDataset(dataset));
|
||||
|
||||
if (this.stats) {
|
||||
// Stats mode: show detailed information for datasets that need updating
|
||||
if (shouldUpdate) {
|
||||
await this.showDatasetStats(dataset);
|
||||
updated++;
|
||||
} else {
|
||||
skipped++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!shouldUpdate) {
|
||||
logger.info(`Dataset ${dataset.publish_id}: Up to date, skipping`);
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (this.dryRun) {
|
||||
logger.info(`Dataset ${dataset.publish_id}: Would update DataCite record (dry run)`);
|
||||
updated++;
|
||||
continue;
|
||||
}
|
||||
|
||||
await this.updateDataciteRecord(dataset, prefix, base_domain);
|
||||
logger.info(`Dataset ${dataset.publish_id}: Successfully updated DataCite record`);
|
||||
updated++;
|
||||
} catch (error) {
|
||||
logger.error(`Dataset ${dataset.publish_id}: Failed to update - ${error.message}`);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
if (this.stats) {
|
||||
logger.info(`\nDataCite Stats Summary: ${updated} datasets need updating, ${skipped} are up to date`);
|
||||
} else {
|
||||
logger.info(`DataCite update completed. Updated: ${updated}, Skipped: ${skipped}, Errors: ${errors}`);
|
||||
}
|
||||
}
|
||||
|
||||
private async getDatasets(): Promise<Dataset[]> {
|
||||
const query = Dataset.query()
|
||||
.preload('identifier')
|
||||
.preload('xmlCache')
|
||||
.where('server_state', 'published')
|
||||
.whereHas('identifier', (identifierQuery) => {
|
||||
identifierQuery.where('type', 'doi');
|
||||
});
|
||||
|
||||
if (this.publish_id) {
|
||||
query.where('publish_id', this.publish_id);
|
||||
}
|
||||
|
||||
return await query.exec();
|
||||
}
|
||||
|
||||
private async shouldUpdateDataset(dataset: Dataset): Promise<boolean> {
|
||||
try {
|
||||
// Check if dataset has a DOI identifier (HasOne relationship)
|
||||
let doiIdentifier = dataset.identifier;
|
||||
|
||||
if (!doiIdentifier) {
|
||||
// Try to load the relationship if not already loaded
|
||||
await dataset.load('identifier');
|
||||
doiIdentifier = dataset.identifier;
|
||||
}
|
||||
|
||||
if (!doiIdentifier || doiIdentifier.type !== 'doi') {
|
||||
logger.warn(`Dataset ${dataset.publish_id}: No DOI identifier found`);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Validate dataset modification date
|
||||
const datasetModified = dataset.server_date_modified;
|
||||
const now = DateTime.now();
|
||||
|
||||
if (!datasetModified) {
|
||||
logger.error(`Dataset ${dataset.publish_id}: server_date_modified is null or undefined`);
|
||||
return true; // Update anyway if modification date is missing
|
||||
}
|
||||
|
||||
if (datasetModified > now) {
|
||||
logger.error(
|
||||
`Dataset ${dataset.publish_id}: server_date_modified (${datasetModified.toISO()}) is in the future! ` +
|
||||
`Current time: ${now.toISO()}. This indicates a data integrity issue. Skipping update.`,
|
||||
);
|
||||
return false; // Do not update when modification date is invalid
|
||||
}
|
||||
|
||||
// Get DOI information from DataCite using DoiClient
|
||||
const doiClient = new DoiClient();
|
||||
const doiLastModified = await doiClient.getDoiLastModified(doiIdentifier.value);
|
||||
|
||||
if (!doiLastModified) {
|
||||
logger.warn(`Dataset ${dataset.publish_id}: Could not retrieve DOI modification date from DataCite`);
|
||||
return true; // Update anyway if we can't get DOI info
|
||||
}
|
||||
|
||||
// Compare dataset modification date with DOI modification date
|
||||
const doiModified = DateTime.fromJSDate(doiLastModified);
|
||||
|
||||
logger.debug(
|
||||
`Dataset ${dataset.publish_id}: Dataset modified: ${datasetModified.toISO()}, DOI modified: ${doiModified.toISO()}`,
|
||||
);
|
||||
|
||||
// Update if dataset was modified after the DOI record
|
||||
return datasetModified > doiModified;
|
||||
} catch (error) {
|
||||
logger.warn(`Error checking update status for dataset ${dataset.publish_id}: ${error.message}`);
|
||||
return true; // Update anyway if we can't determine status
|
||||
}
|
||||
}
|
||||
|
||||
private async updateDataciteRecord(dataset: Dataset, prefix: string, base_domain: string): Promise<void> {
|
||||
try {
|
||||
// Get the DOI identifier (HasOne relationship)
|
||||
let doiIdentifier = dataset.identifier;
|
||||
|
||||
if (!doiIdentifier) {
|
||||
await dataset.load('identifier');
|
||||
doiIdentifier = dataset.identifier;
|
||||
}
|
||||
|
||||
if (!doiIdentifier || doiIdentifier.type !== 'doi') {
|
||||
throw new Error('No DOI identifier found for dataset');
|
||||
}
|
||||
|
||||
// Generate XML metadata
|
||||
const xmlMeta = (await Index.getDoiRegisterString(dataset)) as string;
|
||||
if (!xmlMeta) {
|
||||
throw new Error('Failed to generate XML metadata');
|
||||
}
|
||||
|
||||
// Construct DOI value and landing page URL
|
||||
const doiValue = doiIdentifier.value; // Use existing DOI value
|
||||
const landingPageUrl = `https://doi.${getDomain(base_domain)}/${doiValue}`;
|
||||
|
||||
// Update DataCite record
|
||||
const doiClient = new DoiClient();
|
||||
const dataciteResponse = await doiClient.registerDoi(doiValue, xmlMeta, landingPageUrl);
|
||||
|
||||
if (dataciteResponse?.status === 201) {
|
||||
// // Update dataset modification date
|
||||
// dataset.server_date_modified = DateTime.now();
|
||||
// await dataset.save();
|
||||
|
||||
// // Update search index
|
||||
// const index_name = 'tethys-records';
|
||||
// await Index.indexDocument(dataset, index_name);
|
||||
|
||||
logger.debug(`Dataset ${dataset.publish_id}: DataCite record and search index updated successfully`);
|
||||
} else {
|
||||
throw new DoiClientException(
|
||||
dataciteResponse?.status || 500,
|
||||
`Unexpected DataCite response code: ${dataciteResponse?.status}`,
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
if (error instanceof DoiClientException) {
|
||||
throw error;
|
||||
}
|
||||
throw new Error(`Failed to update DataCite record: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Shows detailed statistics for a dataset that needs updating
|
||||
*/
|
||||
private async showDatasetStats(dataset: Dataset): Promise<void> {
|
||||
try {
|
||||
let doiIdentifier = dataset.identifier;
|
||||
|
||||
if (!doiIdentifier) {
|
||||
await dataset.load('identifier');
|
||||
doiIdentifier = dataset.identifier;
|
||||
}
|
||||
|
||||
const doiValue = doiIdentifier?.value || 'N/A';
|
||||
const doiStatus = doiIdentifier?.status || 'N/A';
|
||||
const datasetModified = dataset.server_date_modified;
|
||||
|
||||
// Get DOI info from DataCite
|
||||
const doiClient = new DoiClient();
|
||||
const doiLastModified = await doiClient.getDoiLastModified(doiValue);
|
||||
const doiState = await doiClient.getDoiState(doiValue);
|
||||
|
||||
console.log(`
|
||||
┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────
|
||||
│ DOI Value: ${doiValue}
|
||||
│ DOI Status (DB): ${doiStatus}
|
||||
│ DOI State (DataCite): ${doiState || 'Unknown'}
|
||||
│ Dataset Modified: ${datasetModified ? datasetModified.toISO() : 'N/A'}
|
||||
│ DOI Modified: ${doiLastModified ? DateTime.fromJSDate(doiLastModified).toISO() : 'N/A'}
|
||||
│ Needs Update: YES - Dataset newer than DOI
|
||||
└─────────────────────────────────────────────────────────────────────────────────────────────`);
|
||||
} catch (error) {
|
||||
console.log(`
|
||||
┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────
|
||||
│ DOI Value: ${dataset.identifier?.value || 'N/A'}
|
||||
│ Error: ${error.message}
|
||||
│ Needs Update: YES - Error checking status
|
||||
└─────────────────────────────────────────────────────────────────────────────────────────────`);
|
||||
}
|
||||
}
|
||||
}
|
||||
278
docs/commands/index-datasets.md
Normal file
278
docs/commands/index-datasets.md
Normal file
|
|
@ -0,0 +1,278 @@
|
|||
# Dataset Indexing Command
|
||||
|
||||
AdonisJS Ace command for indexing and synchronizing published datasets with OpenSearch for search functionality.
|
||||
|
||||
## Overview
|
||||
|
||||
The `index:datasets` command processes published datasets and creates/updates corresponding search index documents in OpenSearch. It intelligently compares modification timestamps to only re-index datasets when necessary, optimizing performance while maintaining search index accuracy.
|
||||
|
||||
## Command Syntax
|
||||
|
||||
```bash
|
||||
node ace index:datasets [options]
|
||||
```
|
||||
|
||||
## Options
|
||||
|
||||
| Flag | Alias | Description |
|
||||
|------|-------|-------------|
|
||||
| `--publish_id <number>` | `-p` | Index a specific dataset by publish_id |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Basic Operations
|
||||
|
||||
```bash
|
||||
# Index all published datasets that have been modified since last indexing
|
||||
node ace index:datasets
|
||||
|
||||
# Index a specific dataset by publish_id
|
||||
node ace index:datasets --publish_id 231
|
||||
node ace index:datasets -p 231
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
### 1. **Dataset Selection**
|
||||
The command processes datasets that meet these criteria:
|
||||
- `server_state = 'published'` - Only published datasets
|
||||
- Has preloaded `xmlCache` relationship for metadata transformation
|
||||
- Optionally filtered by specific `publish_id`
|
||||
|
||||
### 2. **Smart Update Detection**
|
||||
For each dataset, the command:
|
||||
- Checks if the dataset exists in the OpenSearch index
|
||||
- Compares `server_date_modified` timestamps
|
||||
- Only re-indexes if the dataset is newer than the indexed version
|
||||
|
||||
### 3. **Document Processing**
|
||||
The indexing process involves:
|
||||
1. **XML Generation**: Creates structured XML from dataset metadata
|
||||
2. **XSLT Transformation**: Converts XML to JSON using Saxon-JS processor
|
||||
3. **Index Update**: Updates or creates the document in OpenSearch
|
||||
4. **Logging**: Records success/failure for each operation
|
||||
|
||||
## Index Structure
|
||||
|
||||
### Index Configuration
|
||||
- **Index Name**: `tethys-records`
|
||||
- **Document ID**: Dataset `publish_id`
|
||||
- **Refresh**: `true` (immediate availability)
|
||||
|
||||
### Document Fields
|
||||
The indexed documents contain:
|
||||
- **Metadata Fields**: Title, description, authors, keywords
|
||||
- **Identifiers**: DOI, publish_id, and other identifiers
|
||||
- **Temporal Data**: Publication dates, coverage periods
|
||||
- **Geographic Data**: Spatial coverage information
|
||||
- **Technical Details**: Data formats, access information
|
||||
- **Timestamps**: Creation and modification dates
|
||||
|
||||
## Example Output
|
||||
|
||||
### Successful Run
|
||||
```bash
|
||||
node ace index:datasets
|
||||
```
|
||||
```
|
||||
Found 150 published datasets to process
|
||||
Dataset with publish_id 231 successfully indexed
|
||||
Dataset with publish_id 245 is up to date, skipping indexing
|
||||
Dataset with publish_id 267 successfully indexed
|
||||
An error occurred while indexing dataset with publish_id 289. Error: Invalid XML metadata
|
||||
Processing completed: 148 indexed, 1 skipped, 1 error
|
||||
```
|
||||
|
||||
### Specific Dataset
|
||||
```bash
|
||||
node ace index:datasets --publish_id 231
|
||||
```
|
||||
```
|
||||
Found 1 published dataset to process
|
||||
Dataset with publish_id 231 successfully indexed
|
||||
Processing completed: 1 indexed, 0 skipped, 0 errors
|
||||
```
|
||||
|
||||
## Update Logic
|
||||
|
||||
The command uses intelligent indexing to avoid unnecessary processing:
|
||||
|
||||
| Condition | Action | Reason |
|
||||
|-----------|--------|--------|
|
||||
| Dataset not in index | ✅ Index | New dataset needs indexing |
|
||||
| Dataset newer than indexed version | ✅ Re-index | Dataset has been updated |
|
||||
| Dataset same/older than indexed version | ❌ Skip | Already up to date |
|
||||
| OpenSearch document check fails | ✅ Index | Better safe than sorry |
|
||||
| Invalid XML metadata | ❌ Skip + Log Error | Cannot process invalid data |
|
||||
|
||||
### Timestamp Comparison
|
||||
```typescript
|
||||
// Example comparison logic
|
||||
const existingModified = DateTime.fromMillis(Number(existingDoc.server_date_modified) * 1000);
|
||||
const currentModified = dataset.server_date_modified;
|
||||
|
||||
if (currentModified <= existingModified) {
|
||||
// Skip - already up to date
|
||||
return false;
|
||||
}
|
||||
// Proceed with indexing
|
||||
```
|
||||
|
||||
## XML Transformation Process
|
||||
|
||||
### 1. **XML Generation**
|
||||
```xml
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="true"?>
|
||||
<root>
|
||||
<Dataset>
|
||||
<!-- Dataset metadata fields -->
|
||||
<title>Research Dataset Title</title>
|
||||
<description>Dataset description...</description>
|
||||
<!-- Additional metadata -->
|
||||
</Dataset>
|
||||
</root>
|
||||
```
|
||||
|
||||
### 2. **XSLT Processing**
|
||||
The command uses Saxon-JS with a compiled stylesheet (`solr.sef.json`) to transform XML to JSON:
|
||||
```javascript
|
||||
const result = await SaxonJS.transform({
|
||||
stylesheetText: proc,
|
||||
destination: 'serialized',
|
||||
sourceText: xmlString,
|
||||
});
|
||||
```
|
||||
|
||||
### 3. **Final JSON Document**
|
||||
```json
|
||||
{
|
||||
"id": "231",
|
||||
"title": "Research Dataset Title",
|
||||
"description": "Dataset description...",
|
||||
"authors": ["Author Name"],
|
||||
"server_date_modified": 1634567890,
|
||||
"publish_id": 231
|
||||
}
|
||||
```
|
||||
|
||||
## Configuration Requirements
|
||||
|
||||
### Environment Variables
|
||||
```bash
|
||||
# OpenSearch Configuration
|
||||
OPENSEARCH_HOST=localhost:9200
|
||||
|
||||
# For production:
|
||||
# OPENSEARCH_HOST=your-opensearch-cluster:9200
|
||||
```
|
||||
|
||||
### Required Files
|
||||
- **XSLT Stylesheet**: `public/assets2/solr.sef.json` - Compiled Saxon-JS stylesheet for XML transformation
|
||||
|
||||
### Database Relationships
|
||||
The command expects these model relationships:
|
||||
```typescript
|
||||
// Dataset model must have:
|
||||
@hasOne(() => XmlCache, { foreignKey: 'dataset_id' })
|
||||
public xmlCache: HasOne<typeof XmlCache>
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
The command handles various error scenarios gracefully:
|
||||
|
||||
### Common Errors and Solutions
|
||||
|
||||
| Error | Cause | Solution |
|
||||
|-------|-------|----------|
|
||||
| `XSLT transformation failed` | Invalid XML or missing stylesheet | Check XML structure and stylesheet path |
|
||||
| `OpenSearch connection error` | Service unavailable | Verify OpenSearch is running and accessible |
|
||||
| `JSON parse error` | Malformed transformation result | Check XSLT stylesheet output format |
|
||||
| `Missing xmlCache relationship` | Data integrity issue | Ensure xmlCache exists for dataset |
|
||||
|
||||
### Error Logging
|
||||
```bash
|
||||
# Typical error log entry
|
||||
An error occurred while indexing dataset with publish_id 231.
|
||||
Error: XSLT transformation failed: Invalid XML structure at line 15
|
||||
```
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Batch Processing
|
||||
- Processes datasets sequentially to avoid overwhelming OpenSearch
|
||||
- Each dataset is committed individually for reliability
|
||||
- Failed indexing of one dataset doesn't stop processing others
|
||||
|
||||
### Resource Usage
|
||||
- **Memory**: XML/JSON transformations require temporary memory
|
||||
- **Network**: OpenSearch API calls for each dataset
|
||||
- **CPU**: XSLT transformations are CPU-intensive
|
||||
|
||||
### Optimization Tips
|
||||
```bash
|
||||
# Index only recently modified datasets (run regularly)
|
||||
node ace index:datasets
|
||||
|
||||
# Index specific datasets when needed
|
||||
node ace index:datasets --publish_id 231
|
||||
|
||||
# Consider running during off-peak hours for large batches
|
||||
```
|
||||
|
||||
## Integration with Other Systems
|
||||
|
||||
### Search Functionality
|
||||
The indexed documents power:
|
||||
- **Dataset Search**: Full-text search across metadata
|
||||
- **Faceted Browsing**: Filter by authors, keywords, dates
|
||||
- **Geographic Search**: Spatial query capabilities
|
||||
- **Auto-complete**: Suggest dataset titles and keywords
|
||||
|
||||
### Related Commands
|
||||
- [`update:datacite`](update-datacite.md) - Often run after indexing to sync DOI metadata
|
||||
- **Database migrations** - May require re-indexing after schema changes
|
||||
|
||||
### API Integration
|
||||
The indexed data is consumed by:
|
||||
- **Search API**: `/api/search` endpoints
|
||||
- **Browse API**: `/api/datasets` with filtering
|
||||
- **Recommendations**: Related dataset suggestions
|
||||
|
||||
## Monitoring and Maintenance
|
||||
|
||||
### Regular Tasks
|
||||
```bash
|
||||
# Daily indexing (recommended cron job)
|
||||
0 2 * * * cd /path/to/project && node ace index:datasets
|
||||
|
||||
# Weekly full re-index (if needed)
|
||||
0 3 * * 0 cd /path/to/project && node ace index:datasets --force
|
||||
```
|
||||
|
||||
### Health Checks
|
||||
- Monitor OpenSearch cluster health
|
||||
- Check for failed indexing operations in logs
|
||||
- Verify search functionality is working
|
||||
- Compare dataset counts between database and index
|
||||
|
||||
### Troubleshooting
|
||||
```bash
|
||||
# Check specific dataset indexing
|
||||
node ace index:datasets --publish_id 231
|
||||
|
||||
# Verify OpenSearch connectivity
|
||||
curl -X GET "localhost:9200/_cluster/health"
|
||||
|
||||
# Check index statistics
|
||||
curl -X GET "localhost:9200/tethys-records/_stats"
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Regular Scheduling**: Run the command regularly (daily) to keep the search index current
|
||||
2. **Monitor Logs**: Watch for transformation errors or OpenSearch issues
|
||||
3. **Backup Strategy**: Include OpenSearch indices in backup procedures
|
||||
4. **Resource Management**: Monitor OpenSearch cluster resources during bulk operations
|
||||
5. **Testing**: Verify search functionality after major indexing operations
|
||||
6. **Coordination**: Run indexing before DataCite updates when both are needed
|
||||
216
docs/commands/update-datacite.md
Normal file
216
docs/commands/update-datacite.md
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
# DataCite Update Command
|
||||
|
||||
AdonisJS Ace command for updating DataCite DOI records for published datasets.
|
||||
|
||||
## Overview
|
||||
|
||||
The `update:datacite` command synchronizes your local dataset metadata with DataCite DOI records. It intelligently compares modification dates to only update records when necessary, reducing unnecessary API calls and maintaining data consistency.
|
||||
|
||||
## Command Syntax
|
||||
|
||||
```bash
|
||||
node ace update:datacite [options]
|
||||
```
|
||||
|
||||
## Options
|
||||
|
||||
| Flag | Alias | Description |
|
||||
|------|-------|-------------|
|
||||
| `--publish_id <number>` | `-p` | Update a specific dataset by publish_id |
|
||||
| `--force` | `-f` | Force update all records regardless of modification date |
|
||||
| `--dry-run` | `-d` | Preview what would be updated without making changes |
|
||||
| `--stats` | `-s` | Show detailed statistics for datasets that need updating |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Basic Operations
|
||||
|
||||
```bash
|
||||
# Update all datasets that have been modified since their DOI was last updated
|
||||
node ace update:datacite
|
||||
|
||||
# Update a specific dataset
|
||||
node ace update:datacite --publish_id 231
|
||||
node ace update:datacite -p 231
|
||||
|
||||
# Force update all datasets with DOIs (ignores modification dates)
|
||||
node ace update:datacite --force
|
||||
```
|
||||
|
||||
### Preview and Analysis
|
||||
|
||||
```bash
|
||||
# Preview what would be updated (dry run)
|
||||
node ace update:datacite --dry-run
|
||||
|
||||
# Show detailed statistics for datasets that need updating
|
||||
node ace update:datacite --stats
|
||||
|
||||
# Show stats for a specific dataset
|
||||
node ace update:datacite --stats --publish_id 231
|
||||
```
|
||||
|
||||
### Combined Options
|
||||
|
||||
```bash
|
||||
# Dry run for a specific dataset
|
||||
node ace update:datacite --dry-run --publish_id 231
|
||||
|
||||
# Show stats for all datasets (including up-to-date ones)
|
||||
node ace update:datacite --stats --force
|
||||
```
|
||||
|
||||
## Command Modes
|
||||
|
||||
### 1. **Normal Mode** (Default)
|
||||
Updates DataCite records for datasets that have been modified since their DOI was last updated.
|
||||
|
||||
**Example Output:**
|
||||
```
|
||||
Using DataCite API: https://api.test.datacite.org
|
||||
Found 50 datasets to process
|
||||
Dataset 231: Successfully updated DataCite record
|
||||
Dataset 245: Up to date, skipping
|
||||
Dataset 267: Successfully updated DataCite record
|
||||
DataCite update completed. Updated: 15, Skipped: 35, Errors: 0
|
||||
```
|
||||
|
||||
### 2. **Dry Run Mode** (`--dry-run`)
|
||||
Shows what would be updated without making any changes to DataCite.
|
||||
|
||||
**Use Case:** Preview updates before running the actual command.
|
||||
|
||||
**Example Output:**
|
||||
```
|
||||
Dataset 231: Would update DataCite record (dry run)
|
||||
Dataset 267: Would update DataCite record (dry run)
|
||||
Dataset 245: Up to date, skipping
|
||||
DataCite update completed. Updated: 2, Skipped: 1, Errors: 0
|
||||
```
|
||||
|
||||
### 3. **Stats Mode** (`--stats`)
|
||||
Shows detailed information for each dataset that needs updating, including why it needs updating.
|
||||
|
||||
**Use Case:** Debug synchronization issues, monitor dataset/DOI status, generate reports.
|
||||
|
||||
**Example Output:**
|
||||
```
|
||||
┌─ Dataset 231 ─────────────────────────────────────────────────────────
|
||||
│ DOI Value: 10.21388/tethys.231
|
||||
│ DOI Status (DB): findable
|
||||
│ DOI State (DataCite): findable
|
||||
│ Dataset Modified: 2024-09-15T10:30:00.000Z
|
||||
│ DOI Modified: 2024-09-10T08:15:00.000Z
|
||||
│ Needs Update: YES - Dataset newer than DOI
|
||||
└───────────────────────────────────────────────────────────────────────
|
||||
|
||||
┌─ Dataset 267 ─────────────────────────────────────────────────────────
|
||||
│ DOI Value: 10.21388/tethys.267
|
||||
│ DOI Status (DB): findable
|
||||
│ DOI State (DataCite): findable
|
||||
│ Dataset Modified: 2024-09-18T14:20:00.000Z
|
||||
│ DOI Modified: 2024-09-16T12:45:00.000Z
|
||||
│ Needs Update: YES - Dataset newer than DOI
|
||||
└───────────────────────────────────────────────────────────────────────
|
||||
|
||||
DataCite Stats Summary: 2 datasets need updating, 48 are up to date
|
||||
```
|
||||
|
||||
## Update Logic
|
||||
|
||||
The command uses intelligent update detection:
|
||||
|
||||
1. **Compares modification dates**: Dataset `server_date_modified` vs DOI last modification date from DataCite
|
||||
2. **Validates data integrity**: Checks for missing or future dates
|
||||
3. **Handles API failures gracefully**: Updates anyway if DataCite info can't be retrieved
|
||||
4. **Uses dual API approach**: DataCite REST API (primary) with MDS API fallback
|
||||
|
||||
### When Updates Happen
|
||||
|
||||
| Condition | Action | Reason |
|
||||
|-----------|--------|--------|
|
||||
| Dataset modified > DOI modified | ✅ Update | Dataset has newer changes |
|
||||
| Dataset modified ≤ DOI modified | ❌ Skip | DOI is up to date |
|
||||
| Dataset date in future | ❌ Skip | Invalid data, needs investigation |
|
||||
| Dataset date missing | ✅ Update | Can't determine staleness |
|
||||
| DataCite API error | ✅ Update | Better safe than sorry |
|
||||
| `--force` flag used | ✅ Update | Override all logic |
|
||||
|
||||
## Environment Configuration
|
||||
|
||||
Required environment variables:
|
||||
|
||||
```bash
|
||||
# DataCite Credentials
|
||||
DATACITE_USERNAME=your_username
|
||||
DATACITE_PASSWORD=your_password
|
||||
|
||||
# API Endpoints (environment-specific)
|
||||
DATACITE_API_URL=https://api.test.datacite.org # Test environment
|
||||
DATACITE_SERVICE_URL=https://mds.test.datacite.org # Test MDS
|
||||
|
||||
DATACITE_API_URL=https://api.datacite.org # Production
|
||||
DATACITE_SERVICE_URL=https://mds.datacite.org # Production MDS
|
||||
|
||||
# Project Configuration
|
||||
DATACITE_PREFIX=10.21388 # Your DOI prefix
|
||||
BASE_DOMAIN=tethys.at # Your domain
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
The command handles various error scenarios:
|
||||
|
||||
- **Invalid modification dates**: Logs errors but continues processing other datasets
|
||||
- **DataCite API failures**: Falls back to MDS API, then to safe update
|
||||
- **Missing DOI identifiers**: Skips datasets without DOI identifiers
|
||||
- **Network issues**: Continues with next dataset after logging error
|
||||
|
||||
## Integration
|
||||
|
||||
The command integrates with:
|
||||
|
||||
- **Dataset Model**: Uses `server_date_modified` for change detection
|
||||
- **DatasetIdentifier Model**: Reads DOI values and status
|
||||
- **OpenSearch Index**: Updates search index after DataCite update
|
||||
- **DoiClient**: Handles all DataCite API interactions
|
||||
|
||||
## Common Workflows
|
||||
|
||||
### Daily Maintenance
|
||||
```bash
|
||||
# Update any datasets modified today
|
||||
node ace update:datacite
|
||||
```
|
||||
|
||||
### Pre-Deployment Check
|
||||
```bash
|
||||
# Check what would be updated before deployment
|
||||
node ace update:datacite --dry-run
|
||||
```
|
||||
|
||||
### Debugging Sync Issues
|
||||
```bash
|
||||
# Investigate why specific dataset isn't syncing
|
||||
node ace update:datacite --stats --publish_id 231
|
||||
```
|
||||
|
||||
### Full Resync
|
||||
```bash
|
||||
# Force update all DOI records (use with caution)
|
||||
node ace update:datacite --force
|
||||
```
|
||||
|
||||
### Monitoring Report
|
||||
```bash
|
||||
# Generate sync status report
|
||||
node ace update:datacite --stats > datacite-sync-report.txt
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Regular Updates**: Run daily or after bulk dataset modifications
|
||||
2. **Test First**: Use `--dry-run` or `--stats` before bulk operations
|
||||
3. **Monitor Logs**: Check for data integrity warnings
|
||||
4. **Environment Separation**: Use correct API URLs for test vs production
|
||||
5. **Rate Limiting**: The command handles DataCite rate limits automatically
|
||||
989
package-lock.json
generated
989
package-lock.json
generated
File diff suppressed because it is too large
Load diff
174
readme.md
174
readme.md
|
|
@ -11,6 +11,8 @@ Welcome to the Tethys Research Repository Backend System! This is the backend co
|
|||
- [Configuration](#configuration)
|
||||
- [Database](#database)
|
||||
- [API Documentation](#api-documentation)
|
||||
- [Commands](#commands)
|
||||
- [Documentation](#documentation)
|
||||
- [Contributing](#contributing)
|
||||
- [License](#license)
|
||||
|
||||
|
|
@ -29,5 +31,175 @@ Before you begin, ensure you have met the following requirements:
|
|||
1. Clone this repository:
|
||||
|
||||
```bash
|
||||
git clone https://gitea.geologie.ac.at/geolba/tethys.backend.git
|
||||
git clone git clone https://gitea.geologie.ac.at/geolba/tethys.backend.git
|
||||
cd tethys-backend
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
3. Configure environment variables (see [Configuration](#configuration))
|
||||
|
||||
4. Run database migrations:
|
||||
|
||||
```bash
|
||||
node ace migration:run
|
||||
```
|
||||
|
||||
5. Start the development server:
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
The Tethys Backend provides RESTful APIs for managing research datasets, user authentication, DOI registration, and search functionality.
|
||||
|
||||
## Configuration
|
||||
|
||||
Copy the `.env.example` file to `.env` and configure the following variables:
|
||||
|
||||
### Database Configuration
|
||||
```bash
|
||||
DB_CONNECTION=pg
|
||||
DB_HOST=localhost
|
||||
DB_PORT=5432
|
||||
DB_USER=your_username
|
||||
DB_PASSWORD=your_password
|
||||
DB_DATABASE=tethys_db
|
||||
```
|
||||
|
||||
### DataCite Configuration
|
||||
```bash
|
||||
# DataCite Credentials
|
||||
DATACITE_USERNAME=your_datacite_username
|
||||
DATACITE_PASSWORD=your_datacite_password
|
||||
DATACITE_PREFIX=10.21388
|
||||
|
||||
# Environment-specific API endpoints
|
||||
DATACITE_API_URL=https://api.test.datacite.org # Test environment
|
||||
DATACITE_SERVICE_URL=https://mds.test.datacite.org # Test MDS
|
||||
|
||||
# For production:
|
||||
# DATACITE_API_URL=https://api.datacite.org
|
||||
# DATACITE_SERVICE_URL=https://mds.datacite.org
|
||||
```
|
||||
|
||||
### OpenSearch Configuration
|
||||
```bash
|
||||
OPENSEARCH_HOST=localhost:9200
|
||||
```
|
||||
|
||||
### Application Configuration
|
||||
```bash
|
||||
BASE_DOMAIN=tethys.at
|
||||
APP_KEY=your_app_key
|
||||
```
|
||||
|
||||
## Database
|
||||
|
||||
The system uses PostgreSQL with Lucid ORM. Key models include:
|
||||
|
||||
- **Dataset**: Research dataset metadata
|
||||
- **DatasetIdentifier**: DOI and other identifiers for datasets
|
||||
- **User**: User management and authentication
|
||||
- **XmlCache**: Cached XML metadata
|
||||
|
||||
Run migrations and seeders:
|
||||
|
||||
```bash
|
||||
# Run migrations
|
||||
node ace migration:run
|
||||
|
||||
# Run seeders (if available)
|
||||
node ace db:seed
|
||||
```
|
||||
|
||||
## API Documentation
|
||||
|
||||
API endpoints are available for:
|
||||
|
||||
- Dataset management (`/api/datasets`)
|
||||
- User authentication (`/api/auth`)
|
||||
- DOI registration (`/api/doi`)
|
||||
- Search functionality (`/api/search`)
|
||||
|
||||
*Detailed API documentation can be found in the `/docs/api` directory.*
|
||||
|
||||
## Commands
|
||||
|
||||
The system includes several Ace commands for maintenance and data management:
|
||||
|
||||
### Dataset Indexing
|
||||
```bash
|
||||
# Index all published datasets to OpenSearch
|
||||
node ace index:datasets
|
||||
|
||||
# Index a specific dataset
|
||||
node ace index:datasets --publish_id 123
|
||||
```
|
||||
|
||||
### DataCite DOI Management
|
||||
```bash
|
||||
# Update DataCite records for modified datasets
|
||||
node ace update:datacite
|
||||
|
||||
# Show detailed statistics for datasets needing updates
|
||||
node ace update:datacite --stats
|
||||
|
||||
# Preview what would be updated (dry run)
|
||||
node ace update:datacite --dry-run
|
||||
|
||||
# Force update all DOI records
|
||||
node ace update:datacite --force
|
||||
|
||||
# Update a specific dataset
|
||||
node ace update:datacite --publish_id 123
|
||||
```
|
||||
|
||||
*For detailed command documentation, see the [Commands Documentation](docs/commands/)*
|
||||
|
||||
## Documentation
|
||||
|
||||
Comprehensive documentation is available in the `/docs` directory:
|
||||
|
||||
- **[Commands Documentation](docs/commands/)** - Detailed guides for Ace commands
|
||||
- [DataCite Update Command](docs/commands/update-datacite.md) - DOI synchronization and management
|
||||
- [Dataset Indexing Command](docs/commands/index-datasets.md) - Search index management
|
||||
- **[API Documentation](docs/api/)** - REST API endpoints and usage
|
||||
- **[Deployment Guide](docs/deployment/)** - Production deployment instructions
|
||||
- **[Configuration Guide](docs/configuration/)** - Environment setup and configuration options
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
||||
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
||||
4. Push to the branch (`git push origin feature/amazing-feature`)
|
||||
5. Open a Pull Request
|
||||
|
||||
### Development Guidelines
|
||||
|
||||
- Follow the existing code style and conventions
|
||||
- Write tests for new features
|
||||
- Update documentation for any API changes
|
||||
- Ensure all commands and migrations work properly
|
||||
|
||||
### Testing Commands
|
||||
|
||||
```bash
|
||||
# Run tests
|
||||
npm test
|
||||
|
||||
# Test specific commands
|
||||
node ace update:datacite --dry-run --publish_id 123
|
||||
node ace index:datasets --publish_id 123
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the [MIT License](LICENSE).
|
||||
|
|
@ -8,14 +8,24 @@ import AvatarController from '#controllers/Http/Api/AvatarController';
|
|||
import UserController from '#controllers/Http/Api/UserController';
|
||||
import CollectionsController from '#controllers/Http/Api/collections_controller';
|
||||
import { middleware } from '../kernel.js';
|
||||
// API
|
||||
|
||||
// Clean DOI URL routes (no /api prefix)
|
||||
|
||||
// API routes with /api prefix
|
||||
router
|
||||
.group(() => {
|
||||
router.get('clients', [UserController, 'getSubmitters']).as('client.index').use(middleware.auth());;
|
||||
router.get('authors', [AuthorsController, 'index']).as('author.index').use(middleware.auth());;
|
||||
router.get('clients', [UserController, 'getSubmitters']).as('client.index').use(middleware.auth());
|
||||
router.get('authors', [AuthorsController, 'index']).as('author.index').use(middleware.auth());
|
||||
router.get('datasets', [DatasetController, 'index']).as('dataset.index');
|
||||
router.get('persons', [AuthorsController, 'persons']).as('author.persons');
|
||||
|
||||
// This should come BEFORE any other routes that might conflict
|
||||
router
|
||||
.get('/dataset/:prefix/:value', [DatasetController, 'findByIdentifier'])
|
||||
.where('prefix', /^10\.\d+$/) // Match DOI prefix pattern (10.xxxx)
|
||||
.where('value', /^[a-zA-Z0-9._-]+\.[0-9]+(?:\.[0-9]+)*$/) // Match DOI suffix pattern
|
||||
.as('dataset.findByIdentifier');
|
||||
|
||||
router.get('/dataset', [DatasetController, 'findAll']).as('dataset.findAll');
|
||||
router.get('/dataset/:publish_id', [DatasetController, 'findOne']).as('dataset.findOne');
|
||||
router.get('/sitelinks/:year', [HomeController, 'findDocumentsPerYear']);
|
||||
|
|
@ -35,7 +45,7 @@ router
|
|||
.as('apps.twofactor_backupcodes.create')
|
||||
.use(middleware.auth());
|
||||
|
||||
router.get('collections/:id', [CollectionsController, 'show']).as('collection.show')
|
||||
router.get('collections/:id', [CollectionsController, 'show']).as('collection.show');
|
||||
})
|
||||
// .namespace('App/Controllers/Http/Api')
|
||||
.prefix('api');
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Preloaded File - node ace make:preload rules/orcid
|
||||
| ❯ Do you want to register the preload file in .adonisrc.ts file? (y/N) · true
|
||||
| Do you want to register the preload file in .adonisrc.ts file? (y/N) · true
|
||||
| DONE: create start/rules/orcid.ts
|
||||
| DONE: update adonisrc.ts file
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue